{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Use VPG to Play CartPole-v0\n",
    "\n",
    "PyTorch version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import sys\n",
    "import logging\n",
    "import imp\n",
    "import itertools\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "import matplotlib.pyplot as plt\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "import torch.distributions as distributions\n",
    "torch.manual_seed(0)\n",
    "\n",
    "imp.reload(logging)\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:53:56 [INFO] env: <CartPoleEnv<CartPole-v0>>\n",
      "22:53:56 [INFO] action_space: Discrete(2)\n",
      "22:53:56 [INFO] observation_space: Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)\n",
      "22:53:56 [INFO] reward_range: (-inf, inf)\n",
      "22:53:56 [INFO] metadata: {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}\n",
      "22:53:56 [INFO] _max_episode_steps: 200\n",
      "22:53:56 [INFO] _elapsed_steps: None\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('CartPole-v0')\n",
    "env.seed(0)\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class VPGAgent:\n",
    "    def __init__(self, env,):\n",
    "        self.action_n = env.action_space.n\n",
    "        self.gamma = 0.99\n",
    "\n",
    "        self.policy_net = self.build_net(\n",
    "                input_size=env.observation_space.shape[0],\n",
    "                hidden_sizes=[],\n",
    "                output_size=self.action_n, output_activator=nn.Softmax(1))\n",
    "        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=0.005)\n",
    "\n",
    "    def build_net(self, input_size, hidden_sizes, output_size,\n",
    "            output_activator=None, use_bias=False):\n",
    "        layers = []\n",
    "        for input_size, output_size in zip(\n",
    "                [input_size,] + hidden_sizes, hidden_sizes + [output_size,]):\n",
    "            layers.append(nn.Linear(input_size, output_size, bias=use_bias))\n",
    "            layers.append(nn.ReLU())\n",
    "        layers = layers[:-1]\n",
    "        if output_activator:\n",
    "            layers.append(output_activator)\n",
    "        model = nn.Sequential(*layers)\n",
    "        return model\n",
    "\n",
    "    def reset(self, mode=None):\n",
    "        self.mode = mode\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        state_tensor = torch.as_tensor(observation, dtype=torch.float).unsqueeze(0)\n",
    "        prob_tensor = self.policy_net(state_tensor)\n",
    "        action_tensor = distributions.Categorical(prob_tensor).sample()\n",
    "        action = action_tensor.numpy()[0]\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        if self.mode == 'train':\n",
    "            self.learn()\n",
    "\n",
    "    def learn(self):\n",
    "        state_tensor = torch.as_tensor(self.trajectory[0::4], dtype=torch.float)\n",
    "        reward_tensor = torch.as_tensor(self.trajectory[1::4], dtype=torch.float)\n",
    "        action_tensor = torch.as_tensor(self.trajectory[3::4], dtype=torch.long)\n",
    "        arange_tensor = torch.arange(state_tensor.shape[0], dtype=torch.float)\n",
    "        discount_tensor = self.gamma ** arange_tensor\n",
    "        discounted_reward_tensor = discount_tensor * reward_tensor\n",
    "        discounted_return_tensor = discounted_reward_tensor.flip(0).cumsum(0).flip(0)\n",
    "        all_pi_tensor = self.policy_net(state_tensor)\n",
    "        pi_tensor = torch.gather(all_pi_tensor, 1,\n",
    "                action_tensor.unsqueeze(1)).squeeze(1)\n",
    "        log_pi_tensor = torch.log(torch.clamp(pi_tensor, 1e-6, 1.))\n",
    "        loss_tensor = -(discounted_return_tensor * log_pi_tensor).mean()\n",
    "        self.optimizer.zero_grad()\n",
    "        loss_tensor.backward()\n",
    "        self.optimizer.step()\n",
    "\n",
    "\n",
    "agent = VPGAgent(env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:53:56 [INFO] ==== train ====\n",
      "22:53:56 [DEBUG] train episode 0: reward = 72.00, steps = 72\n",
      "22:53:56 [DEBUG] train episode 1: reward = 50.00, steps = 50\n",
      "22:53:56 [DEBUG] train episode 2: reward = 40.00, steps = 40\n",
      "22:53:56 [DEBUG] train episode 3: reward = 46.00, steps = 46\n",
      "22:53:56 [DEBUG] train episode 4: reward = 24.00, steps = 24\n",
      "22:53:56 [DEBUG] train episode 5: reward = 128.00, steps = 128\n",
      "22:53:56 [DEBUG] train episode 6: reward = 74.00, steps = 74\n",
      "22:53:56 [DEBUG] train episode 7: reward = 53.00, steps = 53\n",
      "22:53:56 [DEBUG] train episode 8: reward = 29.00, steps = 29\n",
      "22:53:56 [DEBUG] train episode 9: reward = 44.00, steps = 44\n",
      "22:53:56 [DEBUG] train episode 10: reward = 75.00, steps = 75\n",
      "22:53:56 [DEBUG] train episode 11: reward = 34.00, steps = 34\n",
      "22:53:56 [DEBUG] train episode 12: reward = 69.00, steps = 69\n",
      "22:53:56 [DEBUG] train episode 13: reward = 18.00, steps = 18\n",
      "22:53:56 [DEBUG] train episode 14: reward = 19.00, steps = 19\n",
      "22:53:56 [DEBUG] train episode 15: reward = 11.00, steps = 11\n",
      "22:53:56 [DEBUG] train episode 16: reward = 19.00, steps = 19\n",
      "22:53:56 [DEBUG] train episode 17: reward = 27.00, steps = 27\n",
      "22:53:56 [DEBUG] train episode 18: reward = 17.00, steps = 17\n",
      "22:53:56 [DEBUG] train episode 19: reward = 47.00, steps = 47\n",
      "22:53:56 [DEBUG] train episode 20: reward = 36.00, steps = 36\n",
      "22:53:56 [DEBUG] train episode 21: reward = 42.00, steps = 42\n",
      "22:53:56 [DEBUG] train episode 22: reward = 52.00, steps = 52\n",
      "22:53:56 [DEBUG] train episode 23: reward = 16.00, steps = 16\n",
      "22:53:56 [DEBUG] train episode 24: reward = 26.00, steps = 26\n",
      "22:53:56 [DEBUG] train episode 25: reward = 50.00, steps = 50\n",
      "22:53:56 [DEBUG] train episode 26: reward = 20.00, steps = 20\n",
      "22:53:56 [DEBUG] train episode 27: reward = 36.00, steps = 36\n",
      "22:53:56 [DEBUG] train episode 28: reward = 22.00, steps = 22\n",
      "22:53:56 [DEBUG] train episode 29: reward = 76.00, steps = 76\n",
      "22:53:56 [DEBUG] train episode 30: reward = 12.00, steps = 12\n",
      "22:53:56 [DEBUG] train episode 31: reward = 26.00, steps = 26\n",
      "22:53:56 [DEBUG] train episode 32: reward = 84.00, steps = 84\n",
      "22:53:56 [DEBUG] train episode 33: reward = 48.00, steps = 48\n",
      "22:53:56 [DEBUG] train episode 34: reward = 18.00, steps = 18\n",
      "22:53:56 [DEBUG] train episode 35: reward = 55.00, steps = 55\n",
      "22:53:56 [DEBUG] train episode 36: reward = 24.00, steps = 24\n",
      "22:53:56 [DEBUG] train episode 37: reward = 23.00, steps = 23\n",
      "22:53:56 [DEBUG] train episode 38: reward = 24.00, steps = 24\n",
      "22:53:56 [DEBUG] train episode 39: reward = 21.00, steps = 21\n",
      "22:53:56 [DEBUG] train episode 40: reward = 21.00, steps = 21\n",
      "22:53:56 [DEBUG] train episode 41: reward = 55.00, steps = 55\n",
      "22:53:56 [DEBUG] train episode 42: reward = 33.00, steps = 33\n",
      "22:53:57 [DEBUG] train episode 43: reward = 78.00, steps = 78\n",
      "22:53:57 [DEBUG] train episode 44: reward = 30.00, steps = 30\n",
      "22:53:57 [DEBUG] train episode 45: reward = 20.00, steps = 20\n",
      "22:53:57 [DEBUG] train episode 46: reward = 40.00, steps = 40\n",
      "22:53:57 [DEBUG] train episode 47: reward = 42.00, steps = 42\n",
      "22:53:57 [DEBUG] train episode 48: reward = 24.00, steps = 24\n",
      "22:53:57 [DEBUG] train episode 49: reward = 15.00, steps = 15\n",
      "22:53:57 [DEBUG] train episode 50: reward = 36.00, steps = 36\n",
      "22:53:57 [DEBUG] train episode 51: reward = 40.00, steps = 40\n",
      "22:53:57 [DEBUG] train episode 52: reward = 55.00, steps = 55\n",
      "22:53:57 [DEBUG] train episode 53: reward = 24.00, steps = 24\n",
      "22:53:57 [DEBUG] train episode 54: reward = 14.00, steps = 14\n",
      "22:53:57 [DEBUG] train episode 55: reward = 21.00, steps = 21\n",
      "22:53:57 [DEBUG] train episode 56: reward = 21.00, steps = 21\n",
      "22:53:57 [DEBUG] train episode 57: reward = 50.00, steps = 50\n",
      "22:53:57 [DEBUG] train episode 58: reward = 26.00, steps = 26\n",
      "22:53:57 [DEBUG] train episode 59: reward = 36.00, steps = 36\n",
      "22:53:57 [DEBUG] train episode 60: reward = 31.00, steps = 31\n",
      "22:53:57 [DEBUG] train episode 61: reward = 44.00, steps = 44\n",
      "22:53:57 [DEBUG] train episode 62: reward = 86.00, steps = 86\n",
      "22:53:57 [DEBUG] train episode 63: reward = 41.00, steps = 41\n",
      "22:53:57 [DEBUG] train episode 64: reward = 40.00, steps = 40\n",
      "22:53:57 [DEBUG] train episode 65: reward = 57.00, steps = 57\n",
      "22:53:57 [DEBUG] train episode 66: reward = 51.00, steps = 51\n",
      "22:53:57 [DEBUG] train episode 67: reward = 76.00, steps = 76\n",
      "22:53:57 [DEBUG] train episode 68: reward = 44.00, steps = 44\n",
      "22:53:57 [DEBUG] train episode 69: reward = 38.00, steps = 38\n",
      "22:53:57 [DEBUG] train episode 70: reward = 31.00, steps = 31\n",
      "22:53:57 [DEBUG] train episode 71: reward = 60.00, steps = 60\n",
      "22:53:57 [DEBUG] train episode 72: reward = 26.00, steps = 26\n",
      "22:53:57 [DEBUG] train episode 73: reward = 105.00, steps = 105\n",
      "22:53:57 [DEBUG] train episode 74: reward = 44.00, steps = 44\n",
      "22:53:57 [DEBUG] train episode 75: reward = 37.00, steps = 37\n",
      "22:53:57 [DEBUG] train episode 76: reward = 29.00, steps = 29\n",
      "22:53:57 [DEBUG] train episode 77: reward = 33.00, steps = 33\n",
      "22:53:57 [DEBUG] train episode 78: reward = 56.00, steps = 56\n",
      "22:53:57 [DEBUG] train episode 79: reward = 33.00, steps = 33\n",
      "22:53:57 [DEBUG] train episode 80: reward = 90.00, steps = 90\n",
      "22:53:57 [DEBUG] train episode 81: reward = 40.00, steps = 40\n",
      "22:53:57 [DEBUG] train episode 82: reward = 34.00, steps = 34\n",
      "22:53:57 [DEBUG] train episode 83: reward = 62.00, steps = 62\n",
      "22:53:57 [DEBUG] train episode 84: reward = 36.00, steps = 36\n",
      "22:53:57 [DEBUG] train episode 85: reward = 65.00, steps = 65\n",
      "22:53:57 [DEBUG] train episode 86: reward = 66.00, steps = 66\n",
      "22:53:57 [DEBUG] train episode 87: reward = 53.00, steps = 53\n",
      "22:53:57 [DEBUG] train episode 88: reward = 21.00, steps = 21\n",
      "22:53:57 [DEBUG] train episode 89: reward = 57.00, steps = 57\n",
      "22:53:57 [DEBUG] train episode 90: reward = 89.00, steps = 89\n",
      "22:53:57 [DEBUG] train episode 91: reward = 65.00, steps = 65\n",
      "22:53:57 [DEBUG] train episode 92: reward = 33.00, steps = 33\n",
      "22:53:57 [DEBUG] train episode 93: reward = 29.00, steps = 29\n",
      "22:53:57 [DEBUG] train episode 94: reward = 42.00, steps = 42\n",
      "22:53:57 [DEBUG] train episode 95: reward = 53.00, steps = 53\n",
      "22:53:57 [DEBUG] train episode 96: reward = 109.00, steps = 109\n",
      "22:53:57 [DEBUG] train episode 97: reward = 32.00, steps = 32\n",
      "22:53:57 [DEBUG] train episode 98: reward = 74.00, steps = 74\n",
      "22:53:57 [DEBUG] train episode 99: reward = 11.00, steps = 11\n",
      "22:53:57 [DEBUG] train episode 100: reward = 42.00, steps = 42\n",
      "22:53:57 [DEBUG] train episode 101: reward = 32.00, steps = 32\n",
      "22:53:57 [DEBUG] train episode 102: reward = 18.00, steps = 18\n",
      "22:53:57 [DEBUG] train episode 103: reward = 26.00, steps = 26\n",
      "22:53:57 [DEBUG] train episode 104: reward = 46.00, steps = 46\n",
      "22:53:57 [DEBUG] train episode 105: reward = 50.00, steps = 50\n",
      "22:53:58 [DEBUG] train episode 106: reward = 79.00, steps = 79\n",
      "22:53:58 [DEBUG] train episode 107: reward = 66.00, steps = 66\n",
      "22:53:58 [DEBUG] train episode 108: reward = 35.00, steps = 35\n",
      "22:53:58 [DEBUG] train episode 109: reward = 36.00, steps = 36\n",
      "22:53:58 [DEBUG] train episode 110: reward = 58.00, steps = 58\n",
      "22:53:58 [DEBUG] train episode 111: reward = 50.00, steps = 50\n",
      "22:53:58 [DEBUG] train episode 112: reward = 52.00, steps = 52\n",
      "22:53:58 [DEBUG] train episode 113: reward = 21.00, steps = 21\n",
      "22:53:58 [DEBUG] train episode 114: reward = 46.00, steps = 46\n",
      "22:53:58 [DEBUG] train episode 115: reward = 42.00, steps = 42\n",
      "22:53:58 [DEBUG] train episode 116: reward = 64.00, steps = 64\n",
      "22:53:58 [DEBUG] train episode 117: reward = 86.00, steps = 86\n",
      "22:53:58 [DEBUG] train episode 118: reward = 55.00, steps = 55\n",
      "22:53:58 [DEBUG] train episode 119: reward = 64.00, steps = 64\n",
      "22:53:58 [DEBUG] train episode 120: reward = 53.00, steps = 53\n",
      "22:53:58 [DEBUG] train episode 121: reward = 52.00, steps = 52\n",
      "22:53:58 [DEBUG] train episode 122: reward = 18.00, steps = 18\n",
      "22:53:58 [DEBUG] train episode 123: reward = 58.00, steps = 58\n",
      "22:53:58 [DEBUG] train episode 124: reward = 22.00, steps = 22\n",
      "22:53:58 [DEBUG] train episode 125: reward = 78.00, steps = 78\n",
      "22:53:58 [DEBUG] train episode 126: reward = 21.00, steps = 21\n",
      "22:53:58 [DEBUG] train episode 127: reward = 77.00, steps = 77\n",
      "22:53:58 [DEBUG] train episode 128: reward = 59.00, steps = 59\n",
      "22:53:58 [DEBUG] train episode 129: reward = 27.00, steps = 27\n",
      "22:53:58 [DEBUG] train episode 130: reward = 91.00, steps = 91\n",
      "22:53:58 [DEBUG] train episode 131: reward = 74.00, steps = 74\n",
      "22:53:58 [DEBUG] train episode 132: reward = 57.00, steps = 57\n",
      "22:53:58 [DEBUG] train episode 133: reward = 50.00, steps = 50\n",
      "22:53:58 [DEBUG] train episode 134: reward = 19.00, steps = 19\n",
      "22:53:58 [DEBUG] train episode 135: reward = 105.00, steps = 105\n",
      "22:53:58 [DEBUG] train episode 136: reward = 50.00, steps = 50\n",
      "22:53:58 [DEBUG] train episode 137: reward = 39.00, steps = 39\n",
      "22:53:58 [DEBUG] train episode 138: reward = 52.00, steps = 52\n",
      "22:53:58 [DEBUG] train episode 139: reward = 45.00, steps = 45\n",
      "22:53:58 [DEBUG] train episode 140: reward = 48.00, steps = 48\n",
      "22:53:58 [DEBUG] train episode 141: reward = 44.00, steps = 44\n",
      "22:53:58 [DEBUG] train episode 142: reward = 88.00, steps = 88\n",
      "22:53:58 [DEBUG] train episode 143: reward = 32.00, steps = 32\n",
      "22:53:58 [DEBUG] train episode 144: reward = 50.00, steps = 50\n",
      "22:53:58 [DEBUG] train episode 145: reward = 34.00, steps = 34\n",
      "22:53:58 [DEBUG] train episode 146: reward = 46.00, steps = 46\n",
      "22:53:58 [DEBUG] train episode 147: reward = 45.00, steps = 45\n",
      "22:53:58 [DEBUG] train episode 148: reward = 38.00, steps = 38\n",
      "22:53:58 [DEBUG] train episode 149: reward = 31.00, steps = 31\n",
      "22:53:58 [DEBUG] train episode 150: reward = 47.00, steps = 47\n",
      "22:53:58 [DEBUG] train episode 151: reward = 76.00, steps = 76\n",
      "22:53:58 [DEBUG] train episode 152: reward = 27.00, steps = 27\n",
      "22:53:58 [DEBUG] train episode 153: reward = 30.00, steps = 30\n",
      "22:53:58 [DEBUG] train episode 154: reward = 118.00, steps = 118\n",
      "22:53:58 [DEBUG] train episode 155: reward = 97.00, steps = 97\n",
      "22:53:58 [DEBUG] train episode 156: reward = 130.00, steps = 130\n",
      "22:53:58 [DEBUG] train episode 157: reward = 35.00, steps = 35\n",
      "22:53:59 [DEBUG] train episode 158: reward = 112.00, steps = 112\n",
      "22:53:59 [DEBUG] train episode 159: reward = 18.00, steps = 18\n",
      "22:53:59 [DEBUG] train episode 160: reward = 65.00, steps = 65\n",
      "22:53:59 [DEBUG] train episode 161: reward = 62.00, steps = 62\n",
      "22:53:59 [DEBUG] train episode 162: reward = 47.00, steps = 47\n",
      "22:53:59 [DEBUG] train episode 163: reward = 87.00, steps = 87\n",
      "22:53:59 [DEBUG] train episode 164: reward = 123.00, steps = 123\n",
      "22:53:59 [DEBUG] train episode 165: reward = 35.00, steps = 35\n",
      "22:53:59 [DEBUG] train episode 166: reward = 65.00, steps = 65\n",
      "22:53:59 [DEBUG] train episode 167: reward = 92.00, steps = 92\n",
      "22:53:59 [DEBUG] train episode 168: reward = 13.00, steps = 13\n",
      "22:53:59 [DEBUG] train episode 169: reward = 33.00, steps = 33\n",
      "22:53:59 [DEBUG] train episode 170: reward = 20.00, steps = 20\n",
      "22:53:59 [DEBUG] train episode 171: reward = 19.00, steps = 19\n",
      "22:53:59 [DEBUG] train episode 172: reward = 80.00, steps = 80\n",
      "22:53:59 [DEBUG] train episode 173: reward = 31.00, steps = 31\n",
      "22:53:59 [DEBUG] train episode 174: reward = 32.00, steps = 32\n",
      "22:53:59 [DEBUG] train episode 175: reward = 28.00, steps = 28\n",
      "22:53:59 [DEBUG] train episode 176: reward = 63.00, steps = 63\n",
      "22:53:59 [DEBUG] train episode 177: reward = 13.00, steps = 13\n",
      "22:53:59 [DEBUG] train episode 178: reward = 36.00, steps = 36\n",
      "22:53:59 [DEBUG] train episode 179: reward = 74.00, steps = 74\n",
      "22:53:59 [DEBUG] train episode 180: reward = 51.00, steps = 51\n",
      "22:53:59 [DEBUG] train episode 181: reward = 17.00, steps = 17\n",
      "22:53:59 [DEBUG] train episode 182: reward = 39.00, steps = 39\n",
      "22:53:59 [DEBUG] train episode 183: reward = 25.00, steps = 25\n",
      "22:53:59 [DEBUG] train episode 184: reward = 26.00, steps = 26\n",
      "22:53:59 [DEBUG] train episode 185: reward = 56.00, steps = 56\n",
      "22:53:59 [DEBUG] train episode 186: reward = 79.00, steps = 79\n",
      "22:53:59 [DEBUG] train episode 187: reward = 40.00, steps = 40\n",
      "22:53:59 [DEBUG] train episode 188: reward = 11.00, steps = 11\n",
      "22:53:59 [DEBUG] train episode 189: reward = 23.00, steps = 23\n",
      "22:53:59 [DEBUG] train episode 190: reward = 22.00, steps = 22\n",
      "22:53:59 [DEBUG] train episode 191: reward = 82.00, steps = 82\n",
      "22:53:59 [DEBUG] train episode 192: reward = 48.00, steps = 48\n",
      "22:53:59 [DEBUG] train episode 193: reward = 23.00, steps = 23\n",
      "22:53:59 [DEBUG] train episode 194: reward = 110.00, steps = 110\n",
      "22:53:59 [DEBUG] train episode 195: reward = 32.00, steps = 32\n",
      "22:53:59 [DEBUG] train episode 196: reward = 67.00, steps = 67\n",
      "22:53:59 [DEBUG] train episode 197: reward = 25.00, steps = 25\n",
      "22:53:59 [DEBUG] train episode 198: reward = 54.00, steps = 54\n",
      "22:53:59 [DEBUG] train episode 199: reward = 65.00, steps = 65\n",
      "22:53:59 [DEBUG] train episode 200: reward = 28.00, steps = 28\n",
      "22:53:59 [DEBUG] train episode 201: reward = 126.00, steps = 126\n",
      "22:53:59 [DEBUG] train episode 202: reward = 25.00, steps = 25\n",
      "22:53:59 [DEBUG] train episode 203: reward = 78.00, steps = 78\n",
      "22:53:59 [DEBUG] train episode 204: reward = 27.00, steps = 27\n",
      "22:53:59 [DEBUG] train episode 205: reward = 65.00, steps = 65\n",
      "22:53:59 [DEBUG] train episode 206: reward = 29.00, steps = 29\n",
      "22:53:59 [DEBUG] train episode 207: reward = 75.00, steps = 75\n",
      "22:53:59 [DEBUG] train episode 208: reward = 44.00, steps = 44\n",
      "22:53:59 [DEBUG] train episode 209: reward = 34.00, steps = 34\n",
      "22:53:59 [DEBUG] train episode 210: reward = 52.00, steps = 52\n",
      "22:53:59 [DEBUG] train episode 211: reward = 62.00, steps = 62\n",
      "22:54:00 [DEBUG] train episode 212: reward = 97.00, steps = 97\n",
      "22:54:00 [DEBUG] train episode 213: reward = 67.00, steps = 67\n",
      "22:54:00 [DEBUG] train episode 214: reward = 29.00, steps = 29\n",
      "22:54:00 [DEBUG] train episode 215: reward = 65.00, steps = 65\n",
      "22:54:00 [DEBUG] train episode 216: reward = 32.00, steps = 32\n",
      "22:54:00 [DEBUG] train episode 217: reward = 44.00, steps = 44\n",
      "22:54:00 [DEBUG] train episode 218: reward = 58.00, steps = 58\n",
      "22:54:00 [DEBUG] train episode 219: reward = 47.00, steps = 47\n",
      "22:54:00 [DEBUG] train episode 220: reward = 76.00, steps = 76\n",
      "22:54:00 [DEBUG] train episode 221: reward = 68.00, steps = 68\n",
      "22:54:00 [DEBUG] train episode 222: reward = 27.00, steps = 27\n",
      "22:54:00 [DEBUG] train episode 223: reward = 106.00, steps = 106\n",
      "22:54:00 [DEBUG] train episode 224: reward = 96.00, steps = 96\n",
      "22:54:00 [DEBUG] train episode 225: reward = 74.00, steps = 74\n",
      "22:54:00 [DEBUG] train episode 226: reward = 91.00, steps = 91\n",
      "22:54:00 [DEBUG] train episode 227: reward = 80.00, steps = 80\n",
      "22:54:00 [DEBUG] train episode 228: reward = 42.00, steps = 42\n",
      "22:54:00 [DEBUG] train episode 229: reward = 31.00, steps = 31\n",
      "22:54:00 [DEBUG] train episode 230: reward = 46.00, steps = 46\n",
      "22:54:00 [DEBUG] train episode 231: reward = 44.00, steps = 44\n",
      "22:54:00 [DEBUG] train episode 232: reward = 86.00, steps = 86\n",
      "22:54:00 [DEBUG] train episode 233: reward = 69.00, steps = 69\n",
      "22:54:00 [DEBUG] train episode 234: reward = 26.00, steps = 26\n",
      "22:54:00 [DEBUG] train episode 235: reward = 40.00, steps = 40\n",
      "22:54:00 [DEBUG] train episode 236: reward = 42.00, steps = 42\n",
      "22:54:00 [DEBUG] train episode 237: reward = 43.00, steps = 43\n",
      "22:54:00 [DEBUG] train episode 238: reward = 39.00, steps = 39\n",
      "22:54:00 [DEBUG] train episode 239: reward = 76.00, steps = 76\n",
      "22:54:00 [DEBUG] train episode 240: reward = 122.00, steps = 122\n",
      "22:54:00 [DEBUG] train episode 241: reward = 105.00, steps = 105\n",
      "22:54:00 [DEBUG] train episode 242: reward = 104.00, steps = 104\n",
      "22:54:00 [DEBUG] train episode 243: reward = 32.00, steps = 32\n",
      "22:54:00 [DEBUG] train episode 244: reward = 104.00, steps = 104\n",
      "22:54:00 [DEBUG] train episode 245: reward = 79.00, steps = 79\n",
      "22:54:00 [DEBUG] train episode 246: reward = 43.00, steps = 43\n",
      "22:54:00 [DEBUG] train episode 247: reward = 26.00, steps = 26\n",
      "22:54:00 [DEBUG] train episode 248: reward = 29.00, steps = 29\n",
      "22:54:00 [DEBUG] train episode 249: reward = 22.00, steps = 22\n",
      "22:54:00 [DEBUG] train episode 250: reward = 35.00, steps = 35\n",
      "22:54:00 [DEBUG] train episode 251: reward = 67.00, steps = 67\n",
      "22:54:00 [DEBUG] train episode 252: reward = 47.00, steps = 47\n",
      "22:54:00 [DEBUG] train episode 253: reward = 51.00, steps = 51\n",
      "22:54:00 [DEBUG] train episode 254: reward = 30.00, steps = 30\n",
      "22:54:00 [DEBUG] train episode 255: reward = 111.00, steps = 111\n",
      "22:54:01 [DEBUG] train episode 256: reward = 94.00, steps = 94\n",
      "22:54:01 [DEBUG] train episode 257: reward = 31.00, steps = 31\n",
      "22:54:01 [DEBUG] train episode 258: reward = 65.00, steps = 65\n",
      "22:54:01 [DEBUG] train episode 259: reward = 31.00, steps = 31\n",
      "22:54:01 [DEBUG] train episode 260: reward = 56.00, steps = 56\n",
      "22:54:01 [DEBUG] train episode 261: reward = 55.00, steps = 55\n",
      "22:54:01 [DEBUG] train episode 262: reward = 38.00, steps = 38\n",
      "22:54:01 [DEBUG] train episode 263: reward = 47.00, steps = 47\n",
      "22:54:01 [DEBUG] train episode 264: reward = 107.00, steps = 107\n",
      "22:54:01 [DEBUG] train episode 265: reward = 106.00, steps = 106\n",
      "22:54:01 [DEBUG] train episode 266: reward = 118.00, steps = 118\n",
      "22:54:01 [DEBUG] train episode 267: reward = 171.00, steps = 171\n",
      "22:54:01 [DEBUG] train episode 268: reward = 63.00, steps = 63\n",
      "22:54:01 [DEBUG] train episode 269: reward = 27.00, steps = 27\n",
      "22:54:01 [DEBUG] train episode 270: reward = 55.00, steps = 55\n",
      "22:54:01 [DEBUG] train episode 271: reward = 24.00, steps = 24\n",
      "22:54:01 [DEBUG] train episode 272: reward = 43.00, steps = 43\n",
      "22:54:01 [DEBUG] train episode 273: reward = 29.00, steps = 29\n",
      "22:54:01 [DEBUG] train episode 274: reward = 57.00, steps = 57\n",
      "22:54:01 [DEBUG] train episode 275: reward = 67.00, steps = 67\n",
      "22:54:01 [DEBUG] train episode 276: reward = 57.00, steps = 57\n",
      "22:54:01 [DEBUG] train episode 277: reward = 42.00, steps = 42\n",
      "22:54:01 [DEBUG] train episode 278: reward = 32.00, steps = 32\n",
      "22:54:01 [DEBUG] train episode 279: reward = 48.00, steps = 48\n",
      "22:54:01 [DEBUG] train episode 280: reward = 62.00, steps = 62\n",
      "22:54:01 [DEBUG] train episode 281: reward = 62.00, steps = 62\n",
      "22:54:01 [DEBUG] train episode 282: reward = 112.00, steps = 112\n",
      "22:54:01 [DEBUG] train episode 283: reward = 88.00, steps = 88\n",
      "22:54:01 [DEBUG] train episode 284: reward = 19.00, steps = 19\n",
      "22:54:01 [DEBUG] train episode 285: reward = 59.00, steps = 59\n",
      "22:54:01 [DEBUG] train episode 286: reward = 39.00, steps = 39\n",
      "22:54:01 [DEBUG] train episode 287: reward = 50.00, steps = 50\n",
      "22:54:01 [DEBUG] train episode 288: reward = 55.00, steps = 55\n",
      "22:54:01 [DEBUG] train episode 289: reward = 85.00, steps = 85\n",
      "22:54:01 [DEBUG] train episode 290: reward = 32.00, steps = 32\n",
      "22:54:01 [DEBUG] train episode 291: reward = 24.00, steps = 24\n",
      "22:54:01 [DEBUG] train episode 292: reward = 68.00, steps = 68\n",
      "22:54:01 [DEBUG] train episode 293: reward = 103.00, steps = 103\n",
      "22:54:01 [DEBUG] train episode 294: reward = 23.00, steps = 23\n",
      "22:54:01 [DEBUG] train episode 295: reward = 48.00, steps = 48\n",
      "22:54:01 [DEBUG] train episode 296: reward = 28.00, steps = 28\n",
      "22:54:01 [DEBUG] train episode 297: reward = 54.00, steps = 54\n",
      "22:54:01 [DEBUG] train episode 298: reward = 178.00, steps = 178\n",
      "22:54:01 [DEBUG] train episode 299: reward = 114.00, steps = 114\n",
      "22:54:01 [DEBUG] train episode 300: reward = 47.00, steps = 47\n",
      "22:54:02 [DEBUG] train episode 301: reward = 39.00, steps = 39\n",
      "22:54:02 [DEBUG] train episode 302: reward = 100.00, steps = 100\n",
      "22:54:02 [DEBUG] train episode 303: reward = 47.00, steps = 47\n",
      "22:54:02 [DEBUG] train episode 304: reward = 51.00, steps = 51\n",
      "22:54:02 [DEBUG] train episode 305: reward = 152.00, steps = 152\n",
      "22:54:02 [DEBUG] train episode 306: reward = 27.00, steps = 27\n",
      "22:54:02 [DEBUG] train episode 307: reward = 26.00, steps = 26\n",
      "22:54:02 [DEBUG] train episode 308: reward = 103.00, steps = 103\n",
      "22:54:02 [DEBUG] train episode 309: reward = 58.00, steps = 58\n",
      "22:54:02 [DEBUG] train episode 310: reward = 108.00, steps = 108\n",
      "22:54:02 [DEBUG] train episode 311: reward = 62.00, steps = 62\n",
      "22:54:02 [DEBUG] train episode 312: reward = 36.00, steps = 36\n",
      "22:54:02 [DEBUG] train episode 313: reward = 47.00, steps = 47\n",
      "22:54:02 [DEBUG] train episode 314: reward = 96.00, steps = 96\n",
      "22:54:02 [DEBUG] train episode 315: reward = 100.00, steps = 100\n",
      "22:54:02 [DEBUG] train episode 316: reward = 53.00, steps = 53\n",
      "22:54:02 [DEBUG] train episode 317: reward = 66.00, steps = 66\n",
      "22:54:02 [DEBUG] train episode 318: reward = 31.00, steps = 31\n",
      "22:54:02 [DEBUG] train episode 319: reward = 76.00, steps = 76\n",
      "22:54:02 [DEBUG] train episode 320: reward = 55.00, steps = 55\n",
      "22:54:02 [DEBUG] train episode 321: reward = 75.00, steps = 75\n",
      "22:54:02 [DEBUG] train episode 322: reward = 54.00, steps = 54\n",
      "22:54:02 [DEBUG] train episode 323: reward = 26.00, steps = 26\n",
      "22:54:02 [DEBUG] train episode 324: reward = 95.00, steps = 95\n",
      "22:54:02 [DEBUG] train episode 325: reward = 34.00, steps = 34\n",
      "22:54:02 [DEBUG] train episode 326: reward = 92.00, steps = 92\n",
      "22:54:02 [DEBUG] train episode 327: reward = 85.00, steps = 85\n",
      "22:54:02 [DEBUG] train episode 328: reward = 63.00, steps = 63\n",
      "22:54:02 [DEBUG] train episode 329: reward = 80.00, steps = 80\n",
      "22:54:02 [DEBUG] train episode 330: reward = 14.00, steps = 14\n",
      "22:54:02 [DEBUG] train episode 331: reward = 29.00, steps = 29\n",
      "22:54:02 [DEBUG] train episode 332: reward = 62.00, steps = 62\n",
      "22:54:02 [DEBUG] train episode 333: reward = 41.00, steps = 41\n",
      "22:54:02 [DEBUG] train episode 334: reward = 192.00, steps = 192\n",
      "22:54:02 [DEBUG] train episode 335: reward = 96.00, steps = 96\n",
      "22:54:02 [DEBUG] train episode 336: reward = 63.00, steps = 63\n",
      "22:54:02 [DEBUG] train episode 337: reward = 53.00, steps = 53\n",
      "22:54:02 [DEBUG] train episode 338: reward = 86.00, steps = 86\n",
      "22:54:02 [DEBUG] train episode 339: reward = 78.00, steps = 78\n",
      "22:54:02 [DEBUG] train episode 340: reward = 74.00, steps = 74\n",
      "22:54:02 [DEBUG] train episode 341: reward = 23.00, steps = 23\n",
      "22:54:02 [DEBUG] train episode 342: reward = 100.00, steps = 100\n",
      "22:54:02 [DEBUG] train episode 343: reward = 72.00, steps = 72\n",
      "22:54:02 [DEBUG] train episode 344: reward = 27.00, steps = 27\n",
      "22:54:02 [DEBUG] train episode 345: reward = 57.00, steps = 57\n",
      "22:54:02 [DEBUG] train episode 346: reward = 28.00, steps = 28\n",
      "22:54:02 [DEBUG] train episode 347: reward = 30.00, steps = 30\n",
      "22:54:03 [DEBUG] train episode 348: reward = 78.00, steps = 78\n",
      "22:54:03 [DEBUG] train episode 349: reward = 29.00, steps = 29\n",
      "22:54:03 [DEBUG] train episode 350: reward = 12.00, steps = 12\n",
      "22:54:03 [DEBUG] train episode 351: reward = 200.00, steps = 200\n",
      "22:54:03 [DEBUG] train episode 352: reward = 61.00, steps = 61\n",
      "22:54:03 [DEBUG] train episode 353: reward = 54.00, steps = 54\n",
      "22:54:03 [DEBUG] train episode 354: reward = 156.00, steps = 156\n",
      "22:54:03 [DEBUG] train episode 355: reward = 41.00, steps = 41\n",
      "22:54:03 [DEBUG] train episode 356: reward = 47.00, steps = 47\n",
      "22:54:03 [DEBUG] train episode 357: reward = 61.00, steps = 61\n",
      "22:54:03 [DEBUG] train episode 358: reward = 25.00, steps = 25\n",
      "22:54:03 [DEBUG] train episode 359: reward = 30.00, steps = 30\n",
      "22:54:03 [DEBUG] train episode 360: reward = 77.00, steps = 77\n",
      "22:54:03 [DEBUG] train episode 361: reward = 64.00, steps = 64\n",
      "22:54:03 [DEBUG] train episode 362: reward = 174.00, steps = 174\n",
      "22:54:03 [DEBUG] train episode 363: reward = 70.00, steps = 70\n",
      "22:54:03 [DEBUG] train episode 364: reward = 93.00, steps = 93\n",
      "22:54:03 [DEBUG] train episode 365: reward = 128.00, steps = 128\n",
      "22:54:03 [DEBUG] train episode 366: reward = 80.00, steps = 80\n",
      "22:54:03 [DEBUG] train episode 367: reward = 36.00, steps = 36\n",
      "22:54:03 [DEBUG] train episode 368: reward = 49.00, steps = 49\n",
      "22:54:03 [DEBUG] train episode 369: reward = 49.00, steps = 49\n",
      "22:54:03 [DEBUG] train episode 370: reward = 77.00, steps = 77\n",
      "22:54:03 [DEBUG] train episode 371: reward = 69.00, steps = 69\n",
      "22:54:03 [DEBUG] train episode 372: reward = 92.00, steps = 92\n",
      "22:54:03 [DEBUG] train episode 373: reward = 79.00, steps = 79\n",
      "22:54:03 [DEBUG] train episode 374: reward = 74.00, steps = 74\n",
      "22:54:03 [DEBUG] train episode 375: reward = 165.00, steps = 165\n",
      "22:54:03 [DEBUG] train episode 376: reward = 77.00, steps = 77\n",
      "22:54:03 [DEBUG] train episode 377: reward = 84.00, steps = 84\n",
      "22:54:03 [DEBUG] train episode 378: reward = 53.00, steps = 53\n",
      "22:54:03 [DEBUG] train episode 379: reward = 35.00, steps = 35\n",
      "22:54:03 [DEBUG] train episode 380: reward = 35.00, steps = 35\n",
      "22:54:03 [DEBUG] train episode 381: reward = 67.00, steps = 67\n",
      "22:54:03 [DEBUG] train episode 382: reward = 84.00, steps = 84\n",
      "22:54:03 [DEBUG] train episode 383: reward = 98.00, steps = 98\n",
      "22:54:03 [DEBUG] train episode 384: reward = 73.00, steps = 73\n",
      "22:54:03 [DEBUG] train episode 385: reward = 74.00, steps = 74\n",
      "22:54:03 [DEBUG] train episode 386: reward = 41.00, steps = 41\n",
      "22:54:03 [DEBUG] train episode 387: reward = 68.00, steps = 68\n",
      "22:54:03 [DEBUG] train episode 388: reward = 47.00, steps = 47\n",
      "22:54:04 [DEBUG] train episode 389: reward = 49.00, steps = 49\n",
      "22:54:04 [DEBUG] train episode 390: reward = 106.00, steps = 106\n",
      "22:54:04 [DEBUG] train episode 391: reward = 29.00, steps = 29\n",
      "22:54:04 [DEBUG] train episode 392: reward = 91.00, steps = 91\n",
      "22:54:04 [DEBUG] train episode 393: reward = 155.00, steps = 155\n",
      "22:54:04 [DEBUG] train episode 394: reward = 80.00, steps = 80\n",
      "22:54:04 [DEBUG] train episode 395: reward = 84.00, steps = 84\n",
      "22:54:04 [DEBUG] train episode 396: reward = 82.00, steps = 82\n",
      "22:54:04 [DEBUG] train episode 397: reward = 22.00, steps = 22\n",
      "22:54:04 [DEBUG] train episode 398: reward = 80.00, steps = 80\n",
      "22:54:04 [DEBUG] train episode 399: reward = 29.00, steps = 29\n",
      "22:54:04 [DEBUG] train episode 400: reward = 25.00, steps = 25\n",
      "22:54:04 [DEBUG] train episode 401: reward = 88.00, steps = 88\n",
      "22:54:04 [DEBUG] train episode 402: reward = 26.00, steps = 26\n",
      "22:54:04 [DEBUG] train episode 403: reward = 32.00, steps = 32\n",
      "22:54:04 [DEBUG] train episode 404: reward = 77.00, steps = 77\n",
      "22:54:04 [DEBUG] train episode 405: reward = 163.00, steps = 163\n",
      "22:54:04 [DEBUG] train episode 406: reward = 117.00, steps = 117\n",
      "22:54:04 [DEBUG] train episode 407: reward = 80.00, steps = 80\n",
      "22:54:04 [DEBUG] train episode 408: reward = 53.00, steps = 53\n",
      "22:54:04 [DEBUG] train episode 409: reward = 58.00, steps = 58\n",
      "22:54:04 [DEBUG] train episode 410: reward = 31.00, steps = 31\n",
      "22:54:04 [DEBUG] train episode 411: reward = 57.00, steps = 57\n",
      "22:54:04 [DEBUG] train episode 412: reward = 132.00, steps = 132\n",
      "22:54:04 [DEBUG] train episode 413: reward = 44.00, steps = 44\n",
      "22:54:04 [DEBUG] train episode 414: reward = 87.00, steps = 87\n",
      "22:54:04 [DEBUG] train episode 415: reward = 41.00, steps = 41\n",
      "22:54:04 [DEBUG] train episode 416: reward = 73.00, steps = 73\n",
      "22:54:04 [DEBUG] train episode 417: reward = 137.00, steps = 137\n",
      "22:54:04 [DEBUG] train episode 418: reward = 97.00, steps = 97\n",
      "22:54:04 [DEBUG] train episode 419: reward = 116.00, steps = 116\n",
      "22:54:04 [DEBUG] train episode 420: reward = 27.00, steps = 27\n",
      "22:54:04 [DEBUG] train episode 421: reward = 34.00, steps = 34\n",
      "22:54:04 [DEBUG] train episode 422: reward = 108.00, steps = 108\n",
      "22:54:04 [DEBUG] train episode 423: reward = 200.00, steps = 200\n",
      "22:54:04 [DEBUG] train episode 424: reward = 34.00, steps = 34\n",
      "22:54:04 [DEBUG] train episode 425: reward = 34.00, steps = 34\n",
      "22:54:05 [DEBUG] train episode 426: reward = 115.00, steps = 115\n",
      "22:54:05 [DEBUG] train episode 427: reward = 55.00, steps = 55\n",
      "22:54:05 [DEBUG] train episode 428: reward = 119.00, steps = 119\n",
      "22:54:05 [DEBUG] train episode 429: reward = 74.00, steps = 74\n",
      "22:54:05 [DEBUG] train episode 430: reward = 42.00, steps = 42\n",
      "22:54:05 [DEBUG] train episode 431: reward = 102.00, steps = 102\n",
      "22:54:05 [DEBUG] train episode 432: reward = 119.00, steps = 119\n",
      "22:54:05 [DEBUG] train episode 433: reward = 32.00, steps = 32\n",
      "22:54:05 [DEBUG] train episode 434: reward = 104.00, steps = 104\n",
      "22:54:05 [DEBUG] train episode 435: reward = 95.00, steps = 95\n",
      "22:54:05 [DEBUG] train episode 436: reward = 73.00, steps = 73\n",
      "22:54:05 [DEBUG] train episode 437: reward = 24.00, steps = 24\n",
      "22:54:05 [DEBUG] train episode 438: reward = 81.00, steps = 81\n",
      "22:54:05 [DEBUG] train episode 439: reward = 130.00, steps = 130\n",
      "22:54:05 [DEBUG] train episode 440: reward = 20.00, steps = 20\n",
      "22:54:05 [DEBUG] train episode 441: reward = 134.00, steps = 134\n",
      "22:54:05 [DEBUG] train episode 442: reward = 161.00, steps = 161\n",
      "22:54:05 [DEBUG] train episode 443: reward = 200.00, steps = 200\n",
      "22:54:05 [DEBUG] train episode 444: reward = 57.00, steps = 57\n",
      "22:54:05 [DEBUG] train episode 445: reward = 86.00, steps = 86\n",
      "22:54:05 [DEBUG] train episode 446: reward = 31.00, steps = 31\n",
      "22:54:05 [DEBUG] train episode 447: reward = 114.00, steps = 114\n",
      "22:54:05 [DEBUG] train episode 448: reward = 12.00, steps = 12\n",
      "22:54:05 [DEBUG] train episode 449: reward = 43.00, steps = 43\n",
      "22:54:05 [DEBUG] train episode 450: reward = 114.00, steps = 114\n",
      "22:54:05 [DEBUG] train episode 451: reward = 20.00, steps = 20\n",
      "22:54:05 [DEBUG] train episode 452: reward = 72.00, steps = 72\n",
      "22:54:05 [DEBUG] train episode 453: reward = 61.00, steps = 61\n",
      "22:54:05 [DEBUG] train episode 454: reward = 116.00, steps = 116\n",
      "22:54:05 [DEBUG] train episode 455: reward = 101.00, steps = 101\n",
      "22:54:05 [DEBUG] train episode 456: reward = 22.00, steps = 22\n",
      "22:54:05 [DEBUG] train episode 457: reward = 75.00, steps = 75\n",
      "22:54:05 [DEBUG] train episode 458: reward = 28.00, steps = 28\n",
      "22:54:05 [DEBUG] train episode 459: reward = 131.00, steps = 131\n",
      "22:54:06 [DEBUG] train episode 460: reward = 87.00, steps = 87\n",
      "22:54:06 [DEBUG] train episode 461: reward = 163.00, steps = 163\n",
      "22:54:06 [DEBUG] train episode 462: reward = 135.00, steps = 135\n",
      "22:54:06 [DEBUG] train episode 463: reward = 191.00, steps = 191\n",
      "22:54:06 [DEBUG] train episode 464: reward = 22.00, steps = 22\n",
      "22:54:06 [DEBUG] train episode 465: reward = 142.00, steps = 142\n",
      "22:54:06 [DEBUG] train episode 466: reward = 46.00, steps = 46\n",
      "22:54:06 [DEBUG] train episode 467: reward = 80.00, steps = 80\n",
      "22:54:06 [DEBUG] train episode 468: reward = 18.00, steps = 18\n",
      "22:54:06 [DEBUG] train episode 469: reward = 125.00, steps = 125\n",
      "22:54:06 [DEBUG] train episode 470: reward = 101.00, steps = 101\n",
      "22:54:06 [DEBUG] train episode 471: reward = 146.00, steps = 146\n",
      "22:54:06 [DEBUG] train episode 472: reward = 37.00, steps = 37\n",
      "22:54:06 [DEBUG] train episode 473: reward = 104.00, steps = 104\n",
      "22:54:06 [DEBUG] train episode 474: reward = 75.00, steps = 75\n",
      "22:54:06 [DEBUG] train episode 475: reward = 44.00, steps = 44\n",
      "22:54:06 [DEBUG] train episode 476: reward = 123.00, steps = 123\n",
      "22:54:06 [DEBUG] train episode 477: reward = 34.00, steps = 34\n",
      "22:54:06 [DEBUG] train episode 478: reward = 13.00, steps = 13\n",
      "22:54:06 [DEBUG] train episode 479: reward = 110.00, steps = 110\n",
      "22:54:06 [DEBUG] train episode 480: reward = 80.00, steps = 80\n",
      "22:54:06 [DEBUG] train episode 481: reward = 86.00, steps = 86\n",
      "22:54:06 [DEBUG] train episode 482: reward = 91.00, steps = 91\n",
      "22:54:06 [DEBUG] train episode 483: reward = 35.00, steps = 35\n",
      "22:54:06 [DEBUG] train episode 484: reward = 70.00, steps = 70\n",
      "22:54:06 [DEBUG] train episode 485: reward = 103.00, steps = 103\n",
      "22:54:06 [DEBUG] train episode 486: reward = 32.00, steps = 32\n",
      "22:54:06 [DEBUG] train episode 487: reward = 44.00, steps = 44\n",
      "22:54:06 [DEBUG] train episode 488: reward = 77.00, steps = 77\n",
      "22:54:06 [DEBUG] train episode 489: reward = 14.00, steps = 14\n",
      "22:54:06 [DEBUG] train episode 490: reward = 69.00, steps = 69\n",
      "22:54:06 [DEBUG] train episode 491: reward = 200.00, steps = 200\n",
      "22:54:06 [DEBUG] train episode 492: reward = 61.00, steps = 61\n",
      "22:54:06 [DEBUG] train episode 493: reward = 38.00, steps = 38\n",
      "22:54:06 [DEBUG] train episode 494: reward = 27.00, steps = 27\n",
      "22:54:06 [DEBUG] train episode 495: reward = 80.00, steps = 80\n",
      "22:54:06 [DEBUG] train episode 496: reward = 61.00, steps = 61\n",
      "22:54:07 [DEBUG] train episode 497: reward = 169.00, steps = 169\n",
      "22:54:07 [DEBUG] train episode 498: reward = 200.00, steps = 200\n",
      "22:54:07 [DEBUG] train episode 499: reward = 40.00, steps = 40\n",
      "22:54:07 [DEBUG] train episode 500: reward = 130.00, steps = 130\n",
      "22:54:07 [DEBUG] train episode 501: reward = 200.00, steps = 200\n",
      "22:54:07 [DEBUG] train episode 502: reward = 75.00, steps = 75\n",
      "22:54:07 [DEBUG] train episode 503: reward = 53.00, steps = 53\n",
      "22:54:07 [DEBUG] train episode 504: reward = 26.00, steps = 26\n",
      "22:54:07 [DEBUG] train episode 505: reward = 133.00, steps = 133\n",
      "22:54:07 [DEBUG] train episode 506: reward = 122.00, steps = 122\n",
      "22:54:07 [DEBUG] train episode 507: reward = 49.00, steps = 49\n",
      "22:54:07 [DEBUG] train episode 508: reward = 138.00, steps = 138\n",
      "22:54:07 [DEBUG] train episode 509: reward = 73.00, steps = 73\n",
      "22:54:07 [DEBUG] train episode 510: reward = 115.00, steps = 115\n",
      "22:54:07 [DEBUG] train episode 511: reward = 200.00, steps = 200\n",
      "22:54:07 [DEBUG] train episode 512: reward = 121.00, steps = 121\n",
      "22:54:07 [DEBUG] train episode 513: reward = 132.00, steps = 132\n",
      "22:54:07 [DEBUG] train episode 514: reward = 65.00, steps = 65\n",
      "22:54:07 [DEBUG] train episode 515: reward = 16.00, steps = 16\n",
      "22:54:07 [DEBUG] train episode 516: reward = 108.00, steps = 108\n",
      "22:54:07 [DEBUG] train episode 517: reward = 40.00, steps = 40\n",
      "22:54:07 [DEBUG] train episode 518: reward = 34.00, steps = 34\n",
      "22:54:07 [DEBUG] train episode 519: reward = 70.00, steps = 70\n",
      "22:54:07 [DEBUG] train episode 520: reward = 101.00, steps = 101\n",
      "22:54:07 [DEBUG] train episode 521: reward = 17.00, steps = 17\n",
      "22:54:07 [DEBUG] train episode 522: reward = 65.00, steps = 65\n",
      "22:54:07 [DEBUG] train episode 523: reward = 106.00, steps = 106\n",
      "22:54:07 [DEBUG] train episode 524: reward = 21.00, steps = 21\n",
      "22:54:07 [DEBUG] train episode 525: reward = 101.00, steps = 101\n",
      "22:54:07 [DEBUG] train episode 526: reward = 103.00, steps = 103\n",
      "22:54:08 [DEBUG] train episode 527: reward = 183.00, steps = 183\n",
      "22:54:08 [DEBUG] train episode 528: reward = 58.00, steps = 58\n",
      "22:54:08 [DEBUG] train episode 529: reward = 34.00, steps = 34\n",
      "22:54:08 [DEBUG] train episode 530: reward = 142.00, steps = 142\n",
      "22:54:08 [DEBUG] train episode 531: reward = 172.00, steps = 172\n",
      "22:54:08 [DEBUG] train episode 532: reward = 110.00, steps = 110\n",
      "22:54:08 [DEBUG] train episode 533: reward = 60.00, steps = 60\n",
      "22:54:08 [DEBUG] train episode 534: reward = 22.00, steps = 22\n",
      "22:54:08 [DEBUG] train episode 535: reward = 118.00, steps = 118\n",
      "22:54:08 [DEBUG] train episode 536: reward = 162.00, steps = 162\n",
      "22:54:08 [DEBUG] train episode 537: reward = 126.00, steps = 126\n",
      "22:54:08 [DEBUG] train episode 538: reward = 47.00, steps = 47\n",
      "22:54:08 [DEBUG] train episode 539: reward = 10.00, steps = 10\n",
      "22:54:08 [DEBUG] train episode 540: reward = 103.00, steps = 103\n",
      "22:54:08 [DEBUG] train episode 541: reward = 59.00, steps = 59\n",
      "22:54:08 [DEBUG] train episode 542: reward = 48.00, steps = 48\n",
      "22:54:08 [DEBUG] train episode 543: reward = 66.00, steps = 66\n",
      "22:54:08 [DEBUG] train episode 544: reward = 149.00, steps = 149\n",
      "22:54:08 [DEBUG] train episode 545: reward = 80.00, steps = 80\n",
      "22:54:08 [DEBUG] train episode 546: reward = 134.00, steps = 134\n",
      "22:54:08 [DEBUG] train episode 547: reward = 111.00, steps = 111\n",
      "22:54:08 [DEBUG] train episode 548: reward = 40.00, steps = 40\n",
      "22:54:08 [DEBUG] train episode 549: reward = 141.00, steps = 141\n",
      "22:54:08 [DEBUG] train episode 550: reward = 111.00, steps = 111\n",
      "22:54:08 [DEBUG] train episode 551: reward = 57.00, steps = 57\n",
      "22:54:08 [DEBUG] train episode 552: reward = 52.00, steps = 52\n",
      "22:54:08 [DEBUG] train episode 553: reward = 90.00, steps = 90\n",
      "22:54:08 [DEBUG] train episode 554: reward = 200.00, steps = 200\n",
      "22:54:08 [DEBUG] train episode 555: reward = 58.00, steps = 58\n",
      "22:54:08 [DEBUG] train episode 556: reward = 60.00, steps = 60\n",
      "22:54:09 [DEBUG] train episode 557: reward = 178.00, steps = 178\n",
      "22:54:09 [DEBUG] train episode 558: reward = 119.00, steps = 119\n",
      "22:54:09 [DEBUG] train episode 559: reward = 17.00, steps = 17\n",
      "22:54:09 [DEBUG] train episode 560: reward = 73.00, steps = 73\n",
      "22:54:09 [DEBUG] train episode 561: reward = 44.00, steps = 44\n",
      "22:54:09 [DEBUG] train episode 562: reward = 177.00, steps = 177\n",
      "22:54:09 [DEBUG] train episode 563: reward = 148.00, steps = 148\n",
      "22:54:09 [DEBUG] train episode 564: reward = 160.00, steps = 160\n",
      "22:54:09 [DEBUG] train episode 565: reward = 50.00, steps = 50\n",
      "22:54:09 [DEBUG] train episode 566: reward = 30.00, steps = 30\n",
      "22:54:09 [DEBUG] train episode 567: reward = 138.00, steps = 138\n",
      "22:54:09 [DEBUG] train episode 568: reward = 44.00, steps = 44\n",
      "22:54:09 [DEBUG] train episode 569: reward = 69.00, steps = 69\n",
      "22:54:09 [DEBUG] train episode 570: reward = 166.00, steps = 166\n",
      "22:54:09 [DEBUG] train episode 571: reward = 70.00, steps = 70\n",
      "22:54:09 [DEBUG] train episode 572: reward = 111.00, steps = 111\n",
      "22:54:09 [DEBUG] train episode 573: reward = 91.00, steps = 91\n",
      "22:54:09 [DEBUG] train episode 574: reward = 133.00, steps = 133\n",
      "22:54:09 [DEBUG] train episode 575: reward = 200.00, steps = 200\n",
      "22:54:09 [DEBUG] train episode 576: reward = 19.00, steps = 19\n",
      "22:54:09 [DEBUG] train episode 577: reward = 200.00, steps = 200\n",
      "22:54:09 [DEBUG] train episode 578: reward = 139.00, steps = 139\n",
      "22:54:09 [DEBUG] train episode 579: reward = 200.00, steps = 200\n",
      "22:54:09 [DEBUG] train episode 580: reward = 14.00, steps = 14\n",
      "22:54:09 [DEBUG] train episode 581: reward = 200.00, steps = 200\n",
      "22:54:09 [DEBUG] train episode 582: reward = 58.00, steps = 58\n",
      "22:54:09 [DEBUG] train episode 583: reward = 140.00, steps = 140\n",
      "22:54:09 [DEBUG] train episode 584: reward = 24.00, steps = 24\n",
      "22:54:10 [DEBUG] train episode 585: reward = 145.00, steps = 145\n",
      "22:54:10 [DEBUG] train episode 586: reward = 26.00, steps = 26\n",
      "22:54:10 [DEBUG] train episode 587: reward = 200.00, steps = 200\n",
      "22:54:10 [DEBUG] train episode 588: reward = 69.00, steps = 69\n",
      "22:54:10 [DEBUG] train episode 589: reward = 153.00, steps = 153\n",
      "22:54:10 [DEBUG] train episode 590: reward = 48.00, steps = 48\n",
      "22:54:10 [DEBUG] train episode 591: reward = 194.00, steps = 194\n",
      "22:54:10 [DEBUG] train episode 592: reward = 87.00, steps = 87\n",
      "22:54:10 [DEBUG] train episode 593: reward = 200.00, steps = 200\n",
      "22:54:10 [DEBUG] train episode 594: reward = 164.00, steps = 164\n",
      "22:54:10 [DEBUG] train episode 595: reward = 200.00, steps = 200\n",
      "22:54:10 [DEBUG] train episode 596: reward = 67.00, steps = 67\n",
      "22:54:10 [DEBUG] train episode 597: reward = 34.00, steps = 34\n",
      "22:54:10 [DEBUG] train episode 598: reward = 200.00, steps = 200\n",
      "22:54:10 [DEBUG] train episode 599: reward = 50.00, steps = 50\n",
      "22:54:10 [DEBUG] train episode 600: reward = 120.00, steps = 120\n",
      "22:54:10 [DEBUG] train episode 601: reward = 17.00, steps = 17\n",
      "22:54:10 [DEBUG] train episode 602: reward = 200.00, steps = 200\n",
      "22:54:10 [DEBUG] train episode 603: reward = 200.00, steps = 200\n",
      "22:54:10 [DEBUG] train episode 604: reward = 24.00, steps = 24\n",
      "22:54:10 [DEBUG] train episode 605: reward = 61.00, steps = 61\n",
      "22:54:10 [DEBUG] train episode 606: reward = 21.00, steps = 21\n",
      "22:54:10 [DEBUG] train episode 607: reward = 184.00, steps = 184\n",
      "22:54:10 [DEBUG] train episode 608: reward = 160.00, steps = 160\n",
      "22:54:10 [DEBUG] train episode 609: reward = 72.00, steps = 72\n",
      "22:54:10 [DEBUG] train episode 610: reward = 123.00, steps = 123\n",
      "22:54:11 [DEBUG] train episode 611: reward = 106.00, steps = 106\n",
      "22:54:11 [DEBUG] train episode 612: reward = 13.00, steps = 13\n",
      "22:54:11 [DEBUG] train episode 613: reward = 174.00, steps = 174\n",
      "22:54:11 [DEBUG] train episode 614: reward = 142.00, steps = 142\n",
      "22:54:11 [DEBUG] train episode 615: reward = 100.00, steps = 100\n",
      "22:54:11 [DEBUG] train episode 616: reward = 138.00, steps = 138\n",
      "22:54:11 [DEBUG] train episode 617: reward = 57.00, steps = 57\n",
      "22:54:11 [DEBUG] train episode 618: reward = 67.00, steps = 67\n",
      "22:54:11 [DEBUG] train episode 619: reward = 149.00, steps = 149\n",
      "22:54:11 [DEBUG] train episode 620: reward = 100.00, steps = 100\n",
      "22:54:11 [DEBUG] train episode 621: reward = 88.00, steps = 88\n",
      "22:54:11 [DEBUG] train episode 622: reward = 60.00, steps = 60\n",
      "22:54:11 [DEBUG] train episode 623: reward = 57.00, steps = 57\n",
      "22:54:11 [DEBUG] train episode 624: reward = 58.00, steps = 58\n",
      "22:54:11 [DEBUG] train episode 625: reward = 155.00, steps = 155\n",
      "22:54:11 [DEBUG] train episode 626: reward = 191.00, steps = 191\n",
      "22:54:11 [DEBUG] train episode 627: reward = 28.00, steps = 28\n",
      "22:54:11 [DEBUG] train episode 628: reward = 192.00, steps = 192\n",
      "22:54:11 [DEBUG] train episode 629: reward = 53.00, steps = 53\n",
      "22:54:11 [DEBUG] train episode 630: reward = 167.00, steps = 167\n",
      "22:54:11 [DEBUG] train episode 631: reward = 16.00, steps = 16\n",
      "22:54:11 [DEBUG] train episode 632: reward = 72.00, steps = 72\n",
      "22:54:11 [DEBUG] train episode 633: reward = 145.00, steps = 145\n",
      "22:54:11 [DEBUG] train episode 634: reward = 51.00, steps = 51\n",
      "22:54:11 [DEBUG] train episode 635: reward = 92.00, steps = 92\n",
      "22:54:11 [DEBUG] train episode 636: reward = 168.00, steps = 168\n",
      "22:54:11 [DEBUG] train episode 637: reward = 98.00, steps = 98\n",
      "22:54:11 [DEBUG] train episode 638: reward = 200.00, steps = 200\n",
      "22:54:12 [DEBUG] train episode 639: reward = 49.00, steps = 49\n",
      "22:54:12 [DEBUG] train episode 640: reward = 125.00, steps = 125\n",
      "22:54:12 [DEBUG] train episode 641: reward = 200.00, steps = 200\n",
      "22:54:12 [DEBUG] train episode 642: reward = 175.00, steps = 175\n",
      "22:54:12 [DEBUG] train episode 643: reward = 73.00, steps = 73\n",
      "22:54:12 [DEBUG] train episode 644: reward = 64.00, steps = 64\n",
      "22:54:12 [DEBUG] train episode 645: reward = 200.00, steps = 200\n",
      "22:54:12 [DEBUG] train episode 646: reward = 54.00, steps = 54\n",
      "22:54:12 [DEBUG] train episode 647: reward = 144.00, steps = 144\n",
      "22:54:12 [DEBUG] train episode 648: reward = 166.00, steps = 166\n",
      "22:54:12 [DEBUG] train episode 649: reward = 89.00, steps = 89\n",
      "22:54:12 [DEBUG] train episode 650: reward = 200.00, steps = 200\n",
      "22:54:12 [DEBUG] train episode 651: reward = 164.00, steps = 164\n",
      "22:54:12 [DEBUG] train episode 652: reward = 200.00, steps = 200\n",
      "22:54:12 [DEBUG] train episode 653: reward = 49.00, steps = 49\n",
      "22:54:12 [DEBUG] train episode 654: reward = 178.00, steps = 178\n",
      "22:54:12 [DEBUG] train episode 655: reward = 16.00, steps = 16\n",
      "22:54:12 [DEBUG] train episode 656: reward = 146.00, steps = 146\n",
      "22:54:12 [DEBUG] train episode 657: reward = 164.00, steps = 164\n",
      "22:54:12 [DEBUG] train episode 658: reward = 180.00, steps = 180\n",
      "22:54:13 [DEBUG] train episode 659: reward = 143.00, steps = 143\n",
      "22:54:13 [DEBUG] train episode 660: reward = 200.00, steps = 200\n",
      "22:54:13 [DEBUG] train episode 661: reward = 200.00, steps = 200\n",
      "22:54:13 [DEBUG] train episode 662: reward = 148.00, steps = 148\n",
      "22:54:13 [DEBUG] train episode 663: reward = 68.00, steps = 68\n",
      "22:54:13 [DEBUG] train episode 664: reward = 63.00, steps = 63\n",
      "22:54:13 [DEBUG] train episode 665: reward = 200.00, steps = 200\n",
      "22:54:13 [DEBUG] train episode 666: reward = 52.00, steps = 52\n",
      "22:54:13 [DEBUG] train episode 667: reward = 200.00, steps = 200\n",
      "22:54:13 [DEBUG] train episode 668: reward = 32.00, steps = 32\n",
      "22:54:13 [DEBUG] train episode 669: reward = 58.00, steps = 58\n",
      "22:54:13 [DEBUG] train episode 670: reward = 100.00, steps = 100\n",
      "22:54:13 [DEBUG] train episode 671: reward = 200.00, steps = 200\n",
      "22:54:13 [DEBUG] train episode 672: reward = 173.00, steps = 173\n",
      "22:54:13 [DEBUG] train episode 673: reward = 89.00, steps = 89\n",
      "22:54:13 [DEBUG] train episode 674: reward = 69.00, steps = 69\n",
      "22:54:13 [DEBUG] train episode 675: reward = 44.00, steps = 44\n",
      "22:54:13 [DEBUG] train episode 676: reward = 20.00, steps = 20\n",
      "22:54:13 [DEBUG] train episode 677: reward = 17.00, steps = 17\n",
      "22:54:13 [DEBUG] train episode 678: reward = 200.00, steps = 200\n",
      "22:54:13 [DEBUG] train episode 679: reward = 71.00, steps = 71\n",
      "22:54:13 [DEBUG] train episode 680: reward = 200.00, steps = 200\n",
      "22:54:13 [DEBUG] train episode 681: reward = 198.00, steps = 198\n",
      "22:54:13 [DEBUG] train episode 682: reward = 82.00, steps = 82\n",
      "22:54:13 [DEBUG] train episode 683: reward = 122.00, steps = 122\n",
      "22:54:14 [DEBUG] train episode 684: reward = 160.00, steps = 160\n",
      "22:54:14 [DEBUG] train episode 685: reward = 147.00, steps = 147\n",
      "22:54:14 [DEBUG] train episode 686: reward = 102.00, steps = 102\n",
      "22:54:14 [DEBUG] train episode 687: reward = 200.00, steps = 200\n",
      "22:54:14 [DEBUG] train episode 688: reward = 106.00, steps = 106\n",
      "22:54:14 [DEBUG] train episode 689: reward = 58.00, steps = 58\n",
      "22:54:14 [DEBUG] train episode 690: reward = 175.00, steps = 175\n",
      "22:54:14 [DEBUG] train episode 691: reward = 189.00, steps = 189\n",
      "22:54:14 [DEBUG] train episode 692: reward = 30.00, steps = 30\n",
      "22:54:14 [DEBUG] train episode 693: reward = 80.00, steps = 80\n",
      "22:54:14 [DEBUG] train episode 694: reward = 36.00, steps = 36\n",
      "22:54:14 [DEBUG] train episode 695: reward = 200.00, steps = 200\n",
      "22:54:14 [DEBUG] train episode 696: reward = 62.00, steps = 62\n",
      "22:54:14 [DEBUG] train episode 697: reward = 75.00, steps = 75\n",
      "22:54:14 [DEBUG] train episode 698: reward = 168.00, steps = 168\n",
      "22:54:14 [DEBUG] train episode 699: reward = 111.00, steps = 111\n",
      "22:54:14 [DEBUG] train episode 700: reward = 154.00, steps = 154\n",
      "22:54:14 [DEBUG] train episode 701: reward = 115.00, steps = 115\n",
      "22:54:14 [DEBUG] train episode 702: reward = 180.00, steps = 180\n",
      "22:54:14 [DEBUG] train episode 703: reward = 110.00, steps = 110\n",
      "22:54:14 [DEBUG] train episode 704: reward = 200.00, steps = 200\n",
      "22:54:14 [DEBUG] train episode 705: reward = 34.00, steps = 34\n",
      "22:54:14 [DEBUG] train episode 706: reward = 76.00, steps = 76\n",
      "22:54:15 [DEBUG] train episode 707: reward = 172.00, steps = 172\n",
      "22:54:15 [DEBUG] train episode 708: reward = 55.00, steps = 55\n",
      "22:54:15 [DEBUG] train episode 709: reward = 97.00, steps = 97\n",
      "22:54:15 [DEBUG] train episode 710: reward = 200.00, steps = 200\n",
      "22:54:15 [DEBUG] train episode 711: reward = 101.00, steps = 101\n",
      "22:54:15 [DEBUG] train episode 712: reward = 139.00, steps = 139\n",
      "22:54:15 [DEBUG] train episode 713: reward = 65.00, steps = 65\n",
      "22:54:15 [DEBUG] train episode 714: reward = 200.00, steps = 200\n",
      "22:54:15 [DEBUG] train episode 715: reward = 200.00, steps = 200\n",
      "22:54:15 [DEBUG] train episode 716: reward = 171.00, steps = 171\n",
      "22:54:15 [DEBUG] train episode 717: reward = 139.00, steps = 139\n",
      "22:54:15 [DEBUG] train episode 718: reward = 200.00, steps = 200\n",
      "22:54:15 [DEBUG] train episode 719: reward = 51.00, steps = 51\n",
      "22:54:15 [DEBUG] train episode 720: reward = 159.00, steps = 159\n",
      "22:54:15 [DEBUG] train episode 721: reward = 105.00, steps = 105\n",
      "22:54:15 [DEBUG] train episode 722: reward = 69.00, steps = 69\n",
      "22:54:15 [DEBUG] train episode 723: reward = 51.00, steps = 51\n",
      "22:54:15 [DEBUG] train episode 724: reward = 104.00, steps = 104\n",
      "22:54:15 [DEBUG] train episode 725: reward = 35.00, steps = 35\n",
      "22:54:15 [DEBUG] train episode 726: reward = 49.00, steps = 49\n",
      "22:54:16 [DEBUG] train episode 727: reward = 176.00, steps = 176\n",
      "22:54:16 [DEBUG] train episode 728: reward = 97.00, steps = 97\n",
      "22:54:16 [DEBUG] train episode 729: reward = 15.00, steps = 15\n",
      "22:54:16 [DEBUG] train episode 730: reward = 113.00, steps = 113\n",
      "22:54:16 [DEBUG] train episode 731: reward = 200.00, steps = 200\n",
      "22:54:16 [DEBUG] train episode 732: reward = 70.00, steps = 70\n",
      "22:54:16 [DEBUG] train episode 733: reward = 145.00, steps = 145\n",
      "22:54:16 [DEBUG] train episode 734: reward = 71.00, steps = 71\n",
      "22:54:16 [DEBUG] train episode 735: reward = 168.00, steps = 168\n",
      "22:54:16 [DEBUG] train episode 736: reward = 178.00, steps = 178\n",
      "22:54:16 [DEBUG] train episode 737: reward = 108.00, steps = 108\n",
      "22:54:16 [DEBUG] train episode 738: reward = 137.00, steps = 137\n",
      "22:54:16 [DEBUG] train episode 739: reward = 161.00, steps = 161\n",
      "22:54:16 [DEBUG] train episode 740: reward = 160.00, steps = 160\n",
      "22:54:16 [DEBUG] train episode 741: reward = 35.00, steps = 35\n",
      "22:54:16 [DEBUG] train episode 742: reward = 200.00, steps = 200\n",
      "22:54:16 [DEBUG] train episode 743: reward = 65.00, steps = 65\n",
      "22:54:16 [DEBUG] train episode 744: reward = 156.00, steps = 156\n",
      "22:54:16 [DEBUG] train episode 745: reward = 13.00, steps = 13\n",
      "22:54:16 [DEBUG] train episode 746: reward = 93.00, steps = 93\n",
      "22:54:17 [DEBUG] train episode 747: reward = 200.00, steps = 200\n",
      "22:54:17 [DEBUG] train episode 748: reward = 148.00, steps = 148\n",
      "22:54:17 [DEBUG] train episode 749: reward = 172.00, steps = 172\n",
      "22:54:17 [DEBUG] train episode 750: reward = 96.00, steps = 96\n",
      "22:54:17 [DEBUG] train episode 751: reward = 154.00, steps = 154\n",
      "22:54:17 [DEBUG] train episode 752: reward = 200.00, steps = 200\n",
      "22:54:17 [DEBUG] train episode 753: reward = 37.00, steps = 37\n",
      "22:54:17 [DEBUG] train episode 754: reward = 127.00, steps = 127\n",
      "22:54:17 [DEBUG] train episode 755: reward = 200.00, steps = 200\n",
      "22:54:17 [DEBUG] train episode 756: reward = 200.00, steps = 200\n",
      "22:54:17 [DEBUG] train episode 757: reward = 33.00, steps = 33\n",
      "22:54:17 [DEBUG] train episode 758: reward = 108.00, steps = 108\n",
      "22:54:17 [DEBUG] train episode 759: reward = 72.00, steps = 72\n",
      "22:54:17 [DEBUG] train episode 760: reward = 156.00, steps = 156\n",
      "22:54:17 [DEBUG] train episode 761: reward = 149.00, steps = 149\n",
      "22:54:17 [DEBUG] train episode 762: reward = 23.00, steps = 23\n",
      "22:54:17 [DEBUG] train episode 763: reward = 200.00, steps = 200\n",
      "22:54:18 [DEBUG] train episode 764: reward = 155.00, steps = 155\n",
      "22:54:18 [DEBUG] train episode 765: reward = 110.00, steps = 110\n",
      "22:54:18 [DEBUG] train episode 766: reward = 181.00, steps = 181\n",
      "22:54:18 [DEBUG] train episode 767: reward = 76.00, steps = 76\n",
      "22:54:18 [DEBUG] train episode 768: reward = 61.00, steps = 61\n",
      "22:54:18 [DEBUG] train episode 769: reward = 124.00, steps = 124\n",
      "22:54:18 [DEBUG] train episode 770: reward = 34.00, steps = 34\n",
      "22:54:18 [DEBUG] train episode 771: reward = 166.00, steps = 166\n",
      "22:54:18 [DEBUG] train episode 772: reward = 200.00, steps = 200\n",
      "22:54:18 [DEBUG] train episode 773: reward = 180.00, steps = 180\n",
      "22:54:18 [DEBUG] train episode 774: reward = 200.00, steps = 200\n",
      "22:54:18 [DEBUG] train episode 775: reward = 118.00, steps = 118\n",
      "22:54:18 [DEBUG] train episode 776: reward = 125.00, steps = 125\n",
      "22:54:18 [DEBUG] train episode 777: reward = 55.00, steps = 55\n",
      "22:54:18 [DEBUG] train episode 778: reward = 21.00, steps = 21\n",
      "22:54:18 [DEBUG] train episode 779: reward = 139.00, steps = 139\n",
      "22:54:18 [DEBUG] train episode 780: reward = 43.00, steps = 43\n",
      "22:54:19 [DEBUG] train episode 781: reward = 122.00, steps = 122\n",
      "22:54:19 [DEBUG] train episode 782: reward = 163.00, steps = 163\n",
      "22:54:19 [DEBUG] train episode 783: reward = 200.00, steps = 200\n",
      "22:54:19 [DEBUG] train episode 784: reward = 146.00, steps = 146\n",
      "22:54:19 [DEBUG] train episode 785: reward = 70.00, steps = 70\n",
      "22:54:19 [DEBUG] train episode 786: reward = 66.00, steps = 66\n",
      "22:54:19 [DEBUG] train episode 787: reward = 200.00, steps = 200\n",
      "22:54:19 [DEBUG] train episode 788: reward = 200.00, steps = 200\n",
      "22:54:19 [DEBUG] train episode 789: reward = 143.00, steps = 143\n",
      "22:54:19 [DEBUG] train episode 790: reward = 35.00, steps = 35\n",
      "22:54:19 [DEBUG] train episode 791: reward = 200.00, steps = 200\n",
      "22:54:19 [DEBUG] train episode 792: reward = 88.00, steps = 88\n",
      "22:54:19 [DEBUG] train episode 793: reward = 198.00, steps = 198\n",
      "22:54:19 [DEBUG] train episode 794: reward = 117.00, steps = 117\n",
      "22:54:19 [DEBUG] train episode 795: reward = 199.00, steps = 199\n",
      "22:54:20 [DEBUG] train episode 796: reward = 200.00, steps = 200\n",
      "22:54:20 [DEBUG] train episode 797: reward = 182.00, steps = 182\n",
      "22:54:20 [DEBUG] train episode 798: reward = 56.00, steps = 56\n",
      "22:54:20 [DEBUG] train episode 799: reward = 19.00, steps = 19\n",
      "22:54:20 [DEBUG] train episode 800: reward = 67.00, steps = 67\n",
      "22:54:20 [DEBUG] train episode 801: reward = 200.00, steps = 200\n",
      "22:54:20 [DEBUG] train episode 802: reward = 107.00, steps = 107\n",
      "22:54:20 [DEBUG] train episode 803: reward = 187.00, steps = 187\n",
      "22:54:20 [DEBUG] train episode 804: reward = 195.00, steps = 195\n",
      "22:54:20 [DEBUG] train episode 805: reward = 64.00, steps = 64\n",
      "22:54:20 [DEBUG] train episode 806: reward = 200.00, steps = 200\n",
      "22:54:20 [DEBUG] train episode 807: reward = 200.00, steps = 200\n",
      "22:54:20 [DEBUG] train episode 808: reward = 47.00, steps = 47\n",
      "22:54:20 [DEBUG] train episode 809: reward = 128.00, steps = 128\n",
      "22:54:20 [DEBUG] train episode 810: reward = 63.00, steps = 63\n",
      "22:54:20 [DEBUG] train episode 811: reward = 126.00, steps = 126\n",
      "22:54:20 [DEBUG] train episode 812: reward = 125.00, steps = 125\n",
      "22:54:20 [DEBUG] train episode 813: reward = 190.00, steps = 190\n",
      "22:54:20 [DEBUG] train episode 814: reward = 200.00, steps = 200\n",
      "22:54:21 [DEBUG] train episode 815: reward = 50.00, steps = 50\n",
      "22:54:21 [DEBUG] train episode 816: reward = 200.00, steps = 200\n",
      "22:54:21 [DEBUG] train episode 817: reward = 137.00, steps = 137\n",
      "22:54:21 [DEBUG] train episode 818: reward = 73.00, steps = 73\n",
      "22:54:21 [DEBUG] train episode 819: reward = 200.00, steps = 200\n",
      "22:54:21 [DEBUG] train episode 820: reward = 200.00, steps = 200\n",
      "22:54:21 [DEBUG] train episode 821: reward = 200.00, steps = 200\n",
      "22:54:21 [DEBUG] train episode 822: reward = 116.00, steps = 116\n",
      "22:54:21 [DEBUG] train episode 823: reward = 200.00, steps = 200\n",
      "22:54:21 [DEBUG] train episode 824: reward = 54.00, steps = 54\n",
      "22:54:21 [DEBUG] train episode 825: reward = 200.00, steps = 200\n",
      "22:54:21 [DEBUG] train episode 826: reward = 79.00, steps = 79\n",
      "22:54:21 [DEBUG] train episode 827: reward = 71.00, steps = 71\n",
      "22:54:21 [DEBUG] train episode 828: reward = 200.00, steps = 200\n",
      "22:54:21 [DEBUG] train episode 829: reward = 200.00, steps = 200\n",
      "22:54:22 [DEBUG] train episode 830: reward = 194.00, steps = 194\n",
      "22:54:22 [DEBUG] train episode 831: reward = 134.00, steps = 134\n",
      "22:54:22 [DEBUG] train episode 832: reward = 15.00, steps = 15\n",
      "22:54:22 [DEBUG] train episode 833: reward = 113.00, steps = 113\n",
      "22:54:22 [DEBUG] train episode 834: reward = 131.00, steps = 131\n",
      "22:54:22 [DEBUG] train episode 835: reward = 73.00, steps = 73\n",
      "22:54:22 [DEBUG] train episode 836: reward = 114.00, steps = 114\n",
      "22:54:22 [DEBUG] train episode 837: reward = 162.00, steps = 162\n",
      "22:54:22 [DEBUG] train episode 838: reward = 45.00, steps = 45\n",
      "22:54:22 [DEBUG] train episode 839: reward = 63.00, steps = 63\n",
      "22:54:22 [DEBUG] train episode 840: reward = 61.00, steps = 61\n",
      "22:54:22 [DEBUG] train episode 841: reward = 173.00, steps = 173\n",
      "22:54:22 [DEBUG] train episode 842: reward = 200.00, steps = 200\n",
      "22:54:22 [DEBUG] train episode 843: reward = 164.00, steps = 164\n",
      "22:54:22 [DEBUG] train episode 844: reward = 116.00, steps = 116\n",
      "22:54:22 [DEBUG] train episode 845: reward = 124.00, steps = 124\n",
      "22:54:22 [DEBUG] train episode 846: reward = 86.00, steps = 86\n",
      "22:54:22 [DEBUG] train episode 847: reward = 31.00, steps = 31\n",
      "22:54:22 [DEBUG] train episode 848: reward = 170.00, steps = 170\n",
      "22:54:22 [DEBUG] train episode 849: reward = 27.00, steps = 27\n",
      "22:54:23 [DEBUG] train episode 850: reward = 63.00, steps = 63\n",
      "22:54:23 [DEBUG] train episode 851: reward = 20.00, steps = 20\n",
      "22:54:23 [DEBUG] train episode 852: reward = 200.00, steps = 200\n",
      "22:54:23 [DEBUG] train episode 853: reward = 200.00, steps = 200\n",
      "22:54:23 [DEBUG] train episode 854: reward = 22.00, steps = 22\n",
      "22:54:23 [DEBUG] train episode 855: reward = 128.00, steps = 128\n",
      "22:54:23 [DEBUG] train episode 856: reward = 186.00, steps = 186\n",
      "22:54:23 [DEBUG] train episode 857: reward = 108.00, steps = 108\n",
      "22:54:23 [DEBUG] train episode 858: reward = 103.00, steps = 103\n",
      "22:54:23 [DEBUG] train episode 859: reward = 40.00, steps = 40\n",
      "22:54:23 [DEBUG] train episode 860: reward = 42.00, steps = 42\n",
      "22:54:23 [DEBUG] train episode 861: reward = 200.00, steps = 200\n",
      "22:54:23 [DEBUG] train episode 862: reward = 150.00, steps = 150\n",
      "22:54:23 [DEBUG] train episode 863: reward = 200.00, steps = 200\n",
      "22:54:23 [DEBUG] train episode 864: reward = 200.00, steps = 200\n",
      "22:54:23 [DEBUG] train episode 865: reward = 123.00, steps = 123\n",
      "22:54:23 [DEBUG] train episode 866: reward = 200.00, steps = 200\n",
      "22:54:24 [DEBUG] train episode 867: reward = 163.00, steps = 163\n",
      "22:54:24 [DEBUG] train episode 868: reward = 122.00, steps = 122\n",
      "22:54:24 [DEBUG] train episode 869: reward = 197.00, steps = 197\n",
      "22:54:24 [DEBUG] train episode 870: reward = 200.00, steps = 200\n",
      "22:54:24 [DEBUG] train episode 871: reward = 158.00, steps = 158\n",
      "22:54:24 [DEBUG] train episode 872: reward = 200.00, steps = 200\n",
      "22:54:24 [DEBUG] train episode 873: reward = 114.00, steps = 114\n",
      "22:54:24 [DEBUG] train episode 874: reward = 191.00, steps = 191\n",
      "22:54:24 [DEBUG] train episode 875: reward = 200.00, steps = 200\n",
      "22:54:24 [DEBUG] train episode 876: reward = 200.00, steps = 200\n",
      "22:54:24 [DEBUG] train episode 877: reward = 200.00, steps = 200\n",
      "22:54:24 [DEBUG] train episode 878: reward = 182.00, steps = 182\n",
      "22:54:24 [DEBUG] train episode 879: reward = 130.00, steps = 130\n",
      "22:54:25 [DEBUG] train episode 880: reward = 200.00, steps = 200\n",
      "22:54:25 [DEBUG] train episode 881: reward = 200.00, steps = 200\n",
      "22:54:25 [DEBUG] train episode 882: reward = 200.00, steps = 200\n",
      "22:54:25 [DEBUG] train episode 883: reward = 30.00, steps = 30\n",
      "22:54:25 [DEBUG] train episode 884: reward = 200.00, steps = 200\n",
      "22:54:25 [DEBUG] train episode 885: reward = 102.00, steps = 102\n",
      "22:54:25 [DEBUG] train episode 886: reward = 42.00, steps = 42\n",
      "22:54:25 [DEBUG] train episode 887: reward = 99.00, steps = 99\n",
      "22:54:25 [DEBUG] train episode 888: reward = 200.00, steps = 200\n",
      "22:54:25 [DEBUG] train episode 889: reward = 135.00, steps = 135\n",
      "22:54:25 [DEBUG] train episode 890: reward = 18.00, steps = 18\n",
      "22:54:25 [DEBUG] train episode 891: reward = 173.00, steps = 173\n",
      "22:54:25 [DEBUG] train episode 892: reward = 200.00, steps = 200\n",
      "22:54:25 [DEBUG] train episode 893: reward = 32.00, steps = 32\n",
      "22:54:25 [DEBUG] train episode 894: reward = 100.00, steps = 100\n",
      "22:54:25 [DEBUG] train episode 895: reward = 200.00, steps = 200\n",
      "22:54:25 [DEBUG] train episode 896: reward = 25.00, steps = 25\n",
      "22:54:25 [DEBUG] train episode 897: reward = 188.00, steps = 188\n",
      "22:54:26 [DEBUG] train episode 898: reward = 143.00, steps = 143\n",
      "22:54:26 [DEBUG] train episode 899: reward = 200.00, steps = 200\n",
      "22:54:26 [DEBUG] train episode 900: reward = 200.00, steps = 200\n",
      "22:54:26 [DEBUG] train episode 901: reward = 34.00, steps = 34\n",
      "22:54:26 [DEBUG] train episode 902: reward = 200.00, steps = 200\n",
      "22:54:26 [DEBUG] train episode 903: reward = 200.00, steps = 200\n",
      "22:54:26 [DEBUG] train episode 904: reward = 56.00, steps = 56\n",
      "22:54:26 [DEBUG] train episode 905: reward = 200.00, steps = 200\n",
      "22:54:26 [DEBUG] train episode 906: reward = 200.00, steps = 200\n",
      "22:54:26 [DEBUG] train episode 907: reward = 48.00, steps = 48\n",
      "22:54:26 [DEBUG] train episode 908: reward = 161.00, steps = 161\n",
      "22:54:26 [DEBUG] train episode 909: reward = 153.00, steps = 153\n",
      "22:54:26 [DEBUG] train episode 910: reward = 172.00, steps = 172\n",
      "22:54:26 [DEBUG] train episode 911: reward = 200.00, steps = 200\n",
      "22:54:26 [DEBUG] train episode 912: reward = 146.00, steps = 146\n",
      "22:54:26 [DEBUG] train episode 913: reward = 71.00, steps = 71\n",
      "22:54:26 [DEBUG] train episode 914: reward = 133.00, steps = 133\n",
      "22:54:26 [DEBUG] train episode 915: reward = 65.00, steps = 65\n",
      "22:54:26 [DEBUG] train episode 916: reward = 73.00, steps = 73\n",
      "22:54:27 [DEBUG] train episode 917: reward = 31.00, steps = 31\n",
      "22:54:27 [DEBUG] train episode 918: reward = 75.00, steps = 75\n",
      "22:54:27 [DEBUG] train episode 919: reward = 26.00, steps = 26\n",
      "22:54:27 [DEBUG] train episode 920: reward = 79.00, steps = 79\n",
      "22:54:27 [DEBUG] train episode 921: reward = 158.00, steps = 158\n",
      "22:54:27 [DEBUG] train episode 922: reward = 74.00, steps = 74\n",
      "22:54:27 [DEBUG] train episode 923: reward = 83.00, steps = 83\n",
      "22:54:27 [DEBUG] train episode 924: reward = 81.00, steps = 81\n",
      "22:54:27 [DEBUG] train episode 925: reward = 25.00, steps = 25\n",
      "22:54:27 [DEBUG] train episode 926: reward = 20.00, steps = 20\n",
      "22:54:27 [DEBUG] train episode 927: reward = 177.00, steps = 177\n",
      "22:54:27 [DEBUG] train episode 928: reward = 200.00, steps = 200\n",
      "22:54:27 [DEBUG] train episode 929: reward = 71.00, steps = 71\n",
      "22:54:27 [DEBUG] train episode 930: reward = 13.00, steps = 13\n",
      "22:54:27 [DEBUG] train episode 931: reward = 200.00, steps = 200\n",
      "22:54:27 [DEBUG] train episode 932: reward = 61.00, steps = 61\n",
      "22:54:27 [DEBUG] train episode 933: reward = 200.00, steps = 200\n",
      "22:54:27 [DEBUG] train episode 934: reward = 54.00, steps = 54\n",
      "22:54:27 [DEBUG] train episode 935: reward = 65.00, steps = 65\n",
      "22:54:27 [DEBUG] train episode 936: reward = 198.00, steps = 198\n",
      "22:54:27 [DEBUG] train episode 937: reward = 37.00, steps = 37\n",
      "22:54:27 [DEBUG] train episode 938: reward = 119.00, steps = 119\n",
      "22:54:27 [DEBUG] train episode 939: reward = 200.00, steps = 200\n",
      "22:54:28 [DEBUG] train episode 940: reward = 185.00, steps = 185\n",
      "22:54:28 [DEBUG] train episode 941: reward = 128.00, steps = 128\n",
      "22:54:28 [DEBUG] train episode 942: reward = 40.00, steps = 40\n",
      "22:54:28 [DEBUG] train episode 943: reward = 200.00, steps = 200\n",
      "22:54:28 [DEBUG] train episode 944: reward = 59.00, steps = 59\n",
      "22:54:28 [DEBUG] train episode 945: reward = 200.00, steps = 200\n",
      "22:54:28 [DEBUG] train episode 946: reward = 140.00, steps = 140\n",
      "22:54:28 [DEBUG] train episode 947: reward = 25.00, steps = 25\n",
      "22:54:28 [DEBUG] train episode 948: reward = 100.00, steps = 100\n",
      "22:54:28 [DEBUG] train episode 949: reward = 22.00, steps = 22\n",
      "22:54:28 [DEBUG] train episode 950: reward = 200.00, steps = 200\n",
      "22:54:28 [DEBUG] train episode 951: reward = 200.00, steps = 200\n",
      "22:54:28 [DEBUG] train episode 952: reward = 200.00, steps = 200\n",
      "22:54:28 [DEBUG] train episode 953: reward = 166.00, steps = 166\n",
      "22:54:28 [DEBUG] train episode 954: reward = 200.00, steps = 200\n",
      "22:54:28 [DEBUG] train episode 955: reward = 169.00, steps = 169\n",
      "22:54:28 [DEBUG] train episode 956: reward = 114.00, steps = 114\n",
      "22:54:28 [DEBUG] train episode 957: reward = 200.00, steps = 200\n",
      "22:54:28 [DEBUG] train episode 958: reward = 103.00, steps = 103\n",
      "22:54:29 [DEBUG] train episode 959: reward = 176.00, steps = 176\n",
      "22:54:29 [DEBUG] train episode 960: reward = 155.00, steps = 155\n",
      "22:54:29 [DEBUG] train episode 961: reward = 126.00, steps = 126\n",
      "22:54:29 [DEBUG] train episode 962: reward = 200.00, steps = 200\n",
      "22:54:29 [DEBUG] train episode 963: reward = 120.00, steps = 120\n",
      "22:54:29 [DEBUG] train episode 964: reward = 159.00, steps = 159\n",
      "22:54:29 [DEBUG] train episode 965: reward = 156.00, steps = 156\n",
      "22:54:29 [DEBUG] train episode 966: reward = 200.00, steps = 200\n",
      "22:54:29 [DEBUG] train episode 967: reward = 91.00, steps = 91\n",
      "22:54:29 [DEBUG] train episode 968: reward = 200.00, steps = 200\n",
      "22:54:29 [DEBUG] train episode 969: reward = 162.00, steps = 162\n",
      "22:54:29 [DEBUG] train episode 970: reward = 189.00, steps = 189\n",
      "22:54:29 [DEBUG] train episode 971: reward = 64.00, steps = 64\n",
      "22:54:29 [DEBUG] train episode 972: reward = 123.00, steps = 123\n",
      "22:54:29 [DEBUG] train episode 973: reward = 200.00, steps = 200\n",
      "22:54:29 [DEBUG] train episode 974: reward = 200.00, steps = 200\n",
      "22:54:29 [DEBUG] train episode 975: reward = 58.00, steps = 58\n",
      "22:54:29 [DEBUG] train episode 976: reward = 133.00, steps = 133\n",
      "22:54:29 [DEBUG] train episode 977: reward = 100.00, steps = 100\n",
      "22:54:29 [DEBUG] train episode 978: reward = 200.00, steps = 200\n",
      "22:54:29 [DEBUG] train episode 979: reward = 106.00, steps = 106\n",
      "22:54:30 [DEBUG] train episode 980: reward = 167.00, steps = 167\n",
      "22:54:30 [DEBUG] train episode 981: reward = 116.00, steps = 116\n",
      "22:54:30 [DEBUG] train episode 982: reward = 158.00, steps = 158\n",
      "22:54:30 [DEBUG] train episode 983: reward = 200.00, steps = 200\n",
      "22:54:30 [DEBUG] train episode 984: reward = 200.00, steps = 200\n",
      "22:54:30 [DEBUG] train episode 985: reward = 112.00, steps = 112\n",
      "22:54:30 [DEBUG] train episode 986: reward = 200.00, steps = 200\n",
      "22:54:30 [DEBUG] train episode 987: reward = 200.00, steps = 200\n",
      "22:54:30 [DEBUG] train episode 988: reward = 200.00, steps = 200\n",
      "22:54:30 [DEBUG] train episode 989: reward = 148.00, steps = 148\n",
      "22:54:30 [DEBUG] train episode 990: reward = 133.00, steps = 133\n",
      "22:54:30 [DEBUG] train episode 991: reward = 200.00, steps = 200\n",
      "22:54:30 [DEBUG] train episode 992: reward = 200.00, steps = 200\n",
      "22:54:30 [DEBUG] train episode 993: reward = 200.00, steps = 200\n",
      "22:54:30 [DEBUG] train episode 994: reward = 60.00, steps = 60\n",
      "22:54:30 [DEBUG] train episode 995: reward = 188.00, steps = 188\n",
      "22:54:30 [DEBUG] train episode 996: reward = 120.00, steps = 120\n",
      "22:54:30 [DEBUG] train episode 997: reward = 70.00, steps = 70\n",
      "22:54:30 [DEBUG] train episode 998: reward = 70.00, steps = 70\n",
      "22:54:31 [DEBUG] train episode 999: reward = 200.00, steps = 200\n",
      "22:54:31 [DEBUG] train episode 1000: reward = 200.00, steps = 200\n",
      "22:54:31 [DEBUG] train episode 1001: reward = 200.00, steps = 200\n",
      "22:54:31 [DEBUG] train episode 1002: reward = 181.00, steps = 181\n",
      "22:54:31 [DEBUG] train episode 1003: reward = 34.00, steps = 34\n",
      "22:54:31 [DEBUG] train episode 1004: reward = 177.00, steps = 177\n",
      "22:54:31 [DEBUG] train episode 1005: reward = 200.00, steps = 200\n",
      "22:54:31 [DEBUG] train episode 1006: reward = 97.00, steps = 97\n",
      "22:54:31 [DEBUG] train episode 1007: reward = 155.00, steps = 155\n",
      "22:54:31 [DEBUG] train episode 1008: reward = 200.00, steps = 200\n",
      "22:54:31 [DEBUG] train episode 1009: reward = 163.00, steps = 163\n",
      "22:54:31 [DEBUG] train episode 1010: reward = 56.00, steps = 56\n",
      "22:54:31 [DEBUG] train episode 1011: reward = 168.00, steps = 168\n",
      "22:54:31 [DEBUG] train episode 1012: reward = 78.00, steps = 78\n",
      "22:54:31 [DEBUG] train episode 1013: reward = 149.00, steps = 149\n",
      "22:54:31 [DEBUG] train episode 1014: reward = 24.00, steps = 24\n",
      "22:54:31 [DEBUG] train episode 1015: reward = 48.00, steps = 48\n",
      "22:54:31 [DEBUG] train episode 1016: reward = 200.00, steps = 200\n",
      "22:54:31 [DEBUG] train episode 1017: reward = 189.00, steps = 189\n",
      "22:54:31 [DEBUG] train episode 1018: reward = 151.00, steps = 151\n",
      "22:54:31 [DEBUG] train episode 1019: reward = 84.00, steps = 84\n",
      "22:54:31 [DEBUG] train episode 1020: reward = 114.00, steps = 114\n",
      "22:54:32 [DEBUG] train episode 1021: reward = 200.00, steps = 200\n",
      "22:54:32 [DEBUG] train episode 1022: reward = 194.00, steps = 194\n",
      "22:54:32 [DEBUG] train episode 1023: reward = 200.00, steps = 200\n",
      "22:54:32 [DEBUG] train episode 1024: reward = 200.00, steps = 200\n",
      "22:54:32 [DEBUG] train episode 1025: reward = 55.00, steps = 55\n",
      "22:54:32 [DEBUG] train episode 1026: reward = 200.00, steps = 200\n",
      "22:54:32 [DEBUG] train episode 1027: reward = 200.00, steps = 200\n",
      "22:54:32 [DEBUG] train episode 1028: reward = 200.00, steps = 200\n",
      "22:54:32 [DEBUG] train episode 1029: reward = 130.00, steps = 130\n",
      "22:54:32 [DEBUG] train episode 1030: reward = 78.00, steps = 78\n",
      "22:54:32 [DEBUG] train episode 1031: reward = 50.00, steps = 50\n",
      "22:54:32 [DEBUG] train episode 1032: reward = 129.00, steps = 129\n",
      "22:54:32 [DEBUG] train episode 1033: reward = 190.00, steps = 190\n",
      "22:54:32 [DEBUG] train episode 1034: reward = 164.00, steps = 164\n",
      "22:54:32 [DEBUG] train episode 1035: reward = 190.00, steps = 190\n",
      "22:54:32 [DEBUG] train episode 1036: reward = 66.00, steps = 66\n",
      "22:54:32 [DEBUG] train episode 1037: reward = 200.00, steps = 200\n",
      "22:54:32 [DEBUG] train episode 1038: reward = 15.00, steps = 15\n",
      "22:54:32 [DEBUG] train episode 1039: reward = 29.00, steps = 29\n",
      "22:54:32 [DEBUG] train episode 1040: reward = 149.00, steps = 149\n",
      "22:54:32 [DEBUG] train episode 1041: reward = 146.00, steps = 146\n",
      "22:54:32 [DEBUG] train episode 1042: reward = 75.00, steps = 75\n",
      "22:54:33 [DEBUG] train episode 1043: reward = 145.00, steps = 145\n",
      "22:54:33 [DEBUG] train episode 1044: reward = 200.00, steps = 200\n",
      "22:54:33 [DEBUG] train episode 1045: reward = 93.00, steps = 93\n",
      "22:54:33 [DEBUG] train episode 1046: reward = 160.00, steps = 160\n",
      "22:54:33 [DEBUG] train episode 1047: reward = 189.00, steps = 189\n",
      "22:54:33 [DEBUG] train episode 1048: reward = 36.00, steps = 36\n",
      "22:54:33 [DEBUG] train episode 1049: reward = 156.00, steps = 156\n",
      "22:54:33 [DEBUG] train episode 1050: reward = 127.00, steps = 127\n",
      "22:54:33 [DEBUG] train episode 1051: reward = 200.00, steps = 200\n",
      "22:54:33 [DEBUG] train episode 1052: reward = 80.00, steps = 80\n",
      "22:54:33 [DEBUG] train episode 1053: reward = 200.00, steps = 200\n",
      "22:54:33 [DEBUG] train episode 1054: reward = 200.00, steps = 200\n",
      "22:54:33 [DEBUG] train episode 1055: reward = 65.00, steps = 65\n",
      "22:54:33 [DEBUG] train episode 1056: reward = 169.00, steps = 169\n",
      "22:54:33 [DEBUG] train episode 1057: reward = 63.00, steps = 63\n",
      "22:54:33 [DEBUG] train episode 1058: reward = 200.00, steps = 200\n",
      "22:54:33 [DEBUG] train episode 1059: reward = 200.00, steps = 200\n",
      "22:54:34 [DEBUG] train episode 1060: reward = 200.00, steps = 200\n",
      "22:54:34 [DEBUG] train episode 1061: reward = 200.00, steps = 200\n",
      "22:54:34 [DEBUG] train episode 1062: reward = 129.00, steps = 129\n",
      "22:54:34 [DEBUG] train episode 1063: reward = 112.00, steps = 112\n",
      "22:54:34 [DEBUG] train episode 1064: reward = 147.00, steps = 147\n",
      "22:54:34 [DEBUG] train episode 1065: reward = 178.00, steps = 178\n",
      "22:54:34 [DEBUG] train episode 1066: reward = 70.00, steps = 70\n",
      "22:54:34 [DEBUG] train episode 1067: reward = 160.00, steps = 160\n",
      "22:54:34 [DEBUG] train episode 1068: reward = 200.00, steps = 200\n",
      "22:54:34 [DEBUG] train episode 1069: reward = 200.00, steps = 200\n",
      "22:54:34 [DEBUG] train episode 1070: reward = 47.00, steps = 47\n",
      "22:54:34 [DEBUG] train episode 1071: reward = 125.00, steps = 125\n",
      "22:54:34 [DEBUG] train episode 1072: reward = 88.00, steps = 88\n",
      "22:54:34 [DEBUG] train episode 1073: reward = 153.00, steps = 153\n",
      "22:54:34 [DEBUG] train episode 1074: reward = 143.00, steps = 143\n",
      "22:54:34 [DEBUG] train episode 1075: reward = 153.00, steps = 153\n",
      "22:54:34 [DEBUG] train episode 1076: reward = 143.00, steps = 143\n",
      "22:54:35 [DEBUG] train episode 1077: reward = 200.00, steps = 200\n",
      "22:54:35 [DEBUG] train episode 1078: reward = 200.00, steps = 200\n",
      "22:54:35 [DEBUG] train episode 1079: reward = 66.00, steps = 66\n",
      "22:54:35 [DEBUG] train episode 1080: reward = 200.00, steps = 200\n",
      "22:54:35 [DEBUG] train episode 1081: reward = 200.00, steps = 200\n",
      "22:54:35 [DEBUG] train episode 1082: reward = 52.00, steps = 52\n",
      "22:54:35 [DEBUG] train episode 1083: reward = 192.00, steps = 192\n",
      "22:54:35 [DEBUG] train episode 1084: reward = 115.00, steps = 115\n",
      "22:54:35 [DEBUG] train episode 1085: reward = 200.00, steps = 200\n",
      "22:54:35 [DEBUG] train episode 1086: reward = 75.00, steps = 75\n",
      "22:54:35 [DEBUG] train episode 1087: reward = 164.00, steps = 164\n",
      "22:54:35 [DEBUG] train episode 1088: reward = 135.00, steps = 135\n",
      "22:54:35 [DEBUG] train episode 1089: reward = 42.00, steps = 42\n",
      "22:54:35 [DEBUG] train episode 1090: reward = 162.00, steps = 162\n",
      "22:54:35 [DEBUG] train episode 1091: reward = 139.00, steps = 139\n",
      "22:54:35 [DEBUG] train episode 1092: reward = 200.00, steps = 200\n",
      "22:54:35 [DEBUG] train episode 1093: reward = 200.00, steps = 200\n",
      "22:54:36 [DEBUG] train episode 1094: reward = 169.00, steps = 169\n",
      "22:54:36 [DEBUG] train episode 1095: reward = 165.00, steps = 165\n",
      "22:54:36 [DEBUG] train episode 1096: reward = 182.00, steps = 182\n",
      "22:54:36 [DEBUG] train episode 1097: reward = 200.00, steps = 200\n",
      "22:54:36 [DEBUG] train episode 1098: reward = 94.00, steps = 94\n",
      "22:54:36 [DEBUG] train episode 1099: reward = 200.00, steps = 200\n",
      "22:54:36 [DEBUG] train episode 1100: reward = 96.00, steps = 96\n",
      "22:54:36 [DEBUG] train episode 1101: reward = 16.00, steps = 16\n",
      "22:54:36 [DEBUG] train episode 1102: reward = 144.00, steps = 144\n",
      "22:54:36 [DEBUG] train episode 1103: reward = 59.00, steps = 59\n",
      "22:54:36 [DEBUG] train episode 1104: reward = 200.00, steps = 200\n",
      "22:54:36 [DEBUG] train episode 1105: reward = 200.00, steps = 200\n",
      "22:54:36 [DEBUG] train episode 1106: reward = 200.00, steps = 200\n",
      "22:54:36 [DEBUG] train episode 1107: reward = 163.00, steps = 163\n",
      "22:54:36 [DEBUG] train episode 1108: reward = 169.00, steps = 169\n",
      "22:54:36 [DEBUG] train episode 1109: reward = 119.00, steps = 119\n",
      "22:54:37 [DEBUG] train episode 1110: reward = 171.00, steps = 171\n",
      "22:54:37 [DEBUG] train episode 1111: reward = 166.00, steps = 166\n",
      "22:54:37 [DEBUG] train episode 1112: reward = 200.00, steps = 200\n",
      "22:54:37 [DEBUG] train episode 1113: reward = 168.00, steps = 168\n",
      "22:54:37 [DEBUG] train episode 1114: reward = 200.00, steps = 200\n",
      "22:54:37 [DEBUG] train episode 1115: reward = 75.00, steps = 75\n",
      "22:54:37 [DEBUG] train episode 1116: reward = 35.00, steps = 35\n",
      "22:54:37 [DEBUG] train episode 1117: reward = 200.00, steps = 200\n",
      "22:54:37 [DEBUG] train episode 1118: reward = 80.00, steps = 80\n",
      "22:54:37 [DEBUG] train episode 1119: reward = 200.00, steps = 200\n",
      "22:54:37 [DEBUG] train episode 1120: reward = 200.00, steps = 200\n",
      "22:54:37 [DEBUG] train episode 1121: reward = 71.00, steps = 71\n",
      "22:54:37 [DEBUG] train episode 1122: reward = 20.00, steps = 20\n",
      "22:54:37 [DEBUG] train episode 1123: reward = 131.00, steps = 131\n",
      "22:54:37 [DEBUG] train episode 1124: reward = 159.00, steps = 159\n",
      "22:54:37 [DEBUG] train episode 1125: reward = 68.00, steps = 68\n",
      "22:54:37 [DEBUG] train episode 1126: reward = 47.00, steps = 47\n",
      "22:54:37 [DEBUG] train episode 1127: reward = 149.00, steps = 149\n",
      "22:54:37 [DEBUG] train episode 1128: reward = 200.00, steps = 200\n",
      "22:54:37 [DEBUG] train episode 1129: reward = 69.00, steps = 69\n",
      "22:54:38 [DEBUG] train episode 1130: reward = 100.00, steps = 100\n",
      "22:54:38 [DEBUG] train episode 1131: reward = 151.00, steps = 151\n",
      "22:54:38 [DEBUG] train episode 1132: reward = 47.00, steps = 47\n",
      "22:54:38 [DEBUG] train episode 1133: reward = 117.00, steps = 117\n",
      "22:54:38 [DEBUG] train episode 1134: reward = 140.00, steps = 140\n",
      "22:54:38 [DEBUG] train episode 1135: reward = 121.00, steps = 121\n",
      "22:54:38 [DEBUG] train episode 1136: reward = 133.00, steps = 133\n",
      "22:54:38 [DEBUG] train episode 1137: reward = 70.00, steps = 70\n",
      "22:54:38 [DEBUG] train episode 1138: reward = 108.00, steps = 108\n",
      "22:54:38 [DEBUG] train episode 1139: reward = 73.00, steps = 73\n",
      "22:54:38 [DEBUG] train episode 1140: reward = 105.00, steps = 105\n",
      "22:54:38 [DEBUG] train episode 1141: reward = 91.00, steps = 91\n",
      "22:54:38 [DEBUG] train episode 1142: reward = 95.00, steps = 95\n",
      "22:54:38 [DEBUG] train episode 1143: reward = 175.00, steps = 175\n",
      "22:54:38 [DEBUG] train episode 1144: reward = 200.00, steps = 200\n",
      "22:54:38 [DEBUG] train episode 1145: reward = 131.00, steps = 131\n",
      "22:54:38 [DEBUG] train episode 1146: reward = 120.00, steps = 120\n",
      "22:54:38 [DEBUG] train episode 1147: reward = 132.00, steps = 132\n",
      "22:54:39 [DEBUG] train episode 1148: reward = 85.00, steps = 85\n",
      "22:54:39 [DEBUG] train episode 1149: reward = 189.00, steps = 189\n",
      "22:54:39 [DEBUG] train episode 1150: reward = 200.00, steps = 200\n",
      "22:54:39 [DEBUG] train episode 1151: reward = 200.00, steps = 200\n",
      "22:54:39 [DEBUG] train episode 1152: reward = 200.00, steps = 200\n",
      "22:54:39 [DEBUG] train episode 1153: reward = 180.00, steps = 180\n",
      "22:54:39 [DEBUG] train episode 1154: reward = 123.00, steps = 123\n",
      "22:54:39 [DEBUG] train episode 1155: reward = 200.00, steps = 200\n",
      "22:54:39 [DEBUG] train episode 1156: reward = 141.00, steps = 141\n",
      "22:54:39 [DEBUG] train episode 1157: reward = 135.00, steps = 135\n",
      "22:54:39 [DEBUG] train episode 1158: reward = 200.00, steps = 200\n",
      "22:54:39 [DEBUG] train episode 1159: reward = 152.00, steps = 152\n",
      "22:54:39 [DEBUG] train episode 1160: reward = 200.00, steps = 200\n",
      "22:54:39 [DEBUG] train episode 1161: reward = 200.00, steps = 200\n",
      "22:54:39 [DEBUG] train episode 1162: reward = 109.00, steps = 109\n",
      "22:54:39 [DEBUG] train episode 1163: reward = 120.00, steps = 120\n",
      "22:54:40 [DEBUG] train episode 1164: reward = 129.00, steps = 129\n",
      "22:54:40 [DEBUG] train episode 1165: reward = 200.00, steps = 200\n",
      "22:54:40 [DEBUG] train episode 1166: reward = 200.00, steps = 200\n",
      "22:54:40 [DEBUG] train episode 1167: reward = 52.00, steps = 52\n",
      "22:54:40 [DEBUG] train episode 1168: reward = 179.00, steps = 179\n",
      "22:54:40 [DEBUG] train episode 1169: reward = 82.00, steps = 82\n",
      "22:54:40 [DEBUG] train episode 1170: reward = 148.00, steps = 148\n",
      "22:54:40 [DEBUG] train episode 1171: reward = 172.00, steps = 172\n",
      "22:54:40 [DEBUG] train episode 1172: reward = 200.00, steps = 200\n",
      "22:54:40 [DEBUG] train episode 1173: reward = 195.00, steps = 195\n",
      "22:54:40 [DEBUG] train episode 1174: reward = 156.00, steps = 156\n",
      "22:54:40 [DEBUG] train episode 1175: reward = 143.00, steps = 143\n",
      "22:54:40 [DEBUG] train episode 1176: reward = 97.00, steps = 97\n",
      "22:54:40 [DEBUG] train episode 1177: reward = 170.00, steps = 170\n",
      "22:54:40 [DEBUG] train episode 1178: reward = 135.00, steps = 135\n",
      "22:54:40 [DEBUG] train episode 1179: reward = 89.00, steps = 89\n",
      "22:54:40 [DEBUG] train episode 1180: reward = 200.00, steps = 200\n",
      "22:54:40 [DEBUG] train episode 1181: reward = 44.00, steps = 44\n",
      "22:54:40 [DEBUG] train episode 1182: reward = 200.00, steps = 200\n",
      "22:54:41 [DEBUG] train episode 1183: reward = 37.00, steps = 37\n",
      "22:54:41 [DEBUG] train episode 1184: reward = 145.00, steps = 145\n",
      "22:54:41 [DEBUG] train episode 1185: reward = 183.00, steps = 183\n",
      "22:54:41 [DEBUG] train episode 1186: reward = 105.00, steps = 105\n",
      "22:54:41 [DEBUG] train episode 1187: reward = 186.00, steps = 186\n",
      "22:54:41 [DEBUG] train episode 1188: reward = 181.00, steps = 181\n",
      "22:54:41 [DEBUG] train episode 1189: reward = 121.00, steps = 121\n",
      "22:54:41 [DEBUG] train episode 1190: reward = 200.00, steps = 200\n",
      "22:54:41 [DEBUG] train episode 1191: reward = 200.00, steps = 200\n",
      "22:54:41 [DEBUG] train episode 1192: reward = 108.00, steps = 108\n",
      "22:54:41 [DEBUG] train episode 1193: reward = 106.00, steps = 106\n",
      "22:54:41 [DEBUG] train episode 1194: reward = 200.00, steps = 200\n",
      "22:54:41 [DEBUG] train episode 1195: reward = 134.00, steps = 134\n",
      "22:54:41 [DEBUG] train episode 1196: reward = 48.00, steps = 48\n",
      "22:54:41 [DEBUG] train episode 1197: reward = 200.00, steps = 200\n",
      "22:54:41 [DEBUG] train episode 1198: reward = 200.00, steps = 200\n",
      "22:54:41 [DEBUG] train episode 1199: reward = 200.00, steps = 200\n",
      "22:54:42 [DEBUG] train episode 1200: reward = 138.00, steps = 138\n",
      "22:54:42 [DEBUG] train episode 1201: reward = 165.00, steps = 165\n",
      "22:54:42 [DEBUG] train episode 1202: reward = 167.00, steps = 167\n",
      "22:54:42 [DEBUG] train episode 1203: reward = 151.00, steps = 151\n",
      "22:54:42 [DEBUG] train episode 1204: reward = 81.00, steps = 81\n",
      "22:54:42 [DEBUG] train episode 1205: reward = 171.00, steps = 171\n",
      "22:54:42 [DEBUG] train episode 1206: reward = 183.00, steps = 183\n",
      "22:54:42 [DEBUG] train episode 1207: reward = 165.00, steps = 165\n",
      "22:54:42 [DEBUG] train episode 1208: reward = 162.00, steps = 162\n",
      "22:54:42 [DEBUG] train episode 1209: reward = 41.00, steps = 41\n",
      "22:54:42 [DEBUG] train episode 1210: reward = 198.00, steps = 198\n",
      "22:54:42 [DEBUG] train episode 1211: reward = 181.00, steps = 181\n",
      "22:54:42 [DEBUG] train episode 1212: reward = 171.00, steps = 171\n",
      "22:54:42 [DEBUG] train episode 1213: reward = 47.00, steps = 47\n",
      "22:54:42 [DEBUG] train episode 1214: reward = 200.00, steps = 200\n",
      "22:54:42 [DEBUG] train episode 1215: reward = 109.00, steps = 109\n",
      "22:54:43 [DEBUG] train episode 1216: reward = 155.00, steps = 155\n",
      "22:54:43 [DEBUG] train episode 1217: reward = 196.00, steps = 196\n",
      "22:54:43 [DEBUG] train episode 1218: reward = 186.00, steps = 186\n",
      "22:54:43 [DEBUG] train episode 1219: reward = 200.00, steps = 200\n",
      "22:54:43 [DEBUG] train episode 1220: reward = 34.00, steps = 34\n",
      "22:54:43 [DEBUG] train episode 1221: reward = 144.00, steps = 144\n",
      "22:54:43 [DEBUG] train episode 1222: reward = 151.00, steps = 151\n",
      "22:54:43 [DEBUG] train episode 1223: reward = 194.00, steps = 194\n",
      "22:54:43 [DEBUG] train episode 1224: reward = 51.00, steps = 51\n",
      "22:54:43 [DEBUG] train episode 1225: reward = 102.00, steps = 102\n",
      "22:54:43 [DEBUG] train episode 1226: reward = 200.00, steps = 200\n",
      "22:54:43 [DEBUG] train episode 1227: reward = 200.00, steps = 200\n",
      "22:54:43 [DEBUG] train episode 1228: reward = 200.00, steps = 200\n",
      "22:54:43 [DEBUG] train episode 1229: reward = 129.00, steps = 129\n",
      "22:54:43 [DEBUG] train episode 1230: reward = 35.00, steps = 35\n",
      "22:54:44 [DEBUG] train episode 1231: reward = 111.00, steps = 111\n",
      "22:54:44 [DEBUG] train episode 1232: reward = 158.00, steps = 158\n",
      "22:54:44 [DEBUG] train episode 1233: reward = 74.00, steps = 74\n",
      "22:54:44 [DEBUG] train episode 1234: reward = 139.00, steps = 139\n",
      "22:54:44 [DEBUG] train episode 1235: reward = 180.00, steps = 180\n",
      "22:54:44 [DEBUG] train episode 1236: reward = 200.00, steps = 200\n",
      "22:54:44 [DEBUG] train episode 1237: reward = 200.00, steps = 200\n",
      "22:54:44 [DEBUG] train episode 1238: reward = 200.00, steps = 200\n",
      "22:54:44 [DEBUG] train episode 1239: reward = 75.00, steps = 75\n",
      "22:54:44 [DEBUG] train episode 1240: reward = 200.00, steps = 200\n",
      "22:54:44 [DEBUG] train episode 1241: reward = 144.00, steps = 144\n",
      "22:54:44 [DEBUG] train episode 1242: reward = 117.00, steps = 117\n",
      "22:54:44 [DEBUG] train episode 1243: reward = 46.00, steps = 46\n",
      "22:54:44 [DEBUG] train episode 1244: reward = 107.00, steps = 107\n",
      "22:54:45 [DEBUG] train episode 1245: reward = 175.00, steps = 175\n",
      "22:54:45 [DEBUG] train episode 1246: reward = 100.00, steps = 100\n",
      "22:54:45 [DEBUG] train episode 1247: reward = 45.00, steps = 45\n",
      "22:54:45 [DEBUG] train episode 1248: reward = 200.00, steps = 200\n",
      "22:54:45 [DEBUG] train episode 1249: reward = 200.00, steps = 200\n",
      "22:54:45 [DEBUG] train episode 1250: reward = 200.00, steps = 200\n",
      "22:54:45 [DEBUG] train episode 1251: reward = 169.00, steps = 169\n",
      "22:54:45 [DEBUG] train episode 1252: reward = 152.00, steps = 152\n",
      "22:54:45 [DEBUG] train episode 1253: reward = 200.00, steps = 200\n",
      "22:54:45 [DEBUG] train episode 1254: reward = 155.00, steps = 155\n",
      "22:54:45 [DEBUG] train episode 1255: reward = 199.00, steps = 199\n",
      "22:54:45 [DEBUG] train episode 1256: reward = 124.00, steps = 124\n",
      "22:54:45 [DEBUG] train episode 1257: reward = 178.00, steps = 178\n",
      "22:54:45 [DEBUG] train episode 1258: reward = 174.00, steps = 174\n",
      "22:54:45 [DEBUG] train episode 1259: reward = 200.00, steps = 200\n",
      "22:54:46 [DEBUG] train episode 1260: reward = 196.00, steps = 196\n",
      "22:54:46 [DEBUG] train episode 1261: reward = 177.00, steps = 177\n",
      "22:54:46 [DEBUG] train episode 1262: reward = 120.00, steps = 120\n",
      "22:54:46 [DEBUG] train episode 1263: reward = 182.00, steps = 182\n",
      "22:54:46 [DEBUG] train episode 1264: reward = 198.00, steps = 198\n",
      "22:54:46 [DEBUG] train episode 1265: reward = 200.00, steps = 200\n",
      "22:54:46 [DEBUG] train episode 1266: reward = 44.00, steps = 44\n",
      "22:54:46 [DEBUG] train episode 1267: reward = 200.00, steps = 200\n",
      "22:54:46 [DEBUG] train episode 1268: reward = 151.00, steps = 151\n",
      "22:54:46 [DEBUG] train episode 1269: reward = 139.00, steps = 139\n",
      "22:54:46 [DEBUG] train episode 1270: reward = 200.00, steps = 200\n",
      "22:54:46 [DEBUG] train episode 1271: reward = 200.00, steps = 200\n",
      "22:54:47 [DEBUG] train episode 1272: reward = 200.00, steps = 200\n",
      "22:54:47 [DEBUG] train episode 1273: reward = 113.00, steps = 113\n",
      "22:54:47 [DEBUG] train episode 1274: reward = 156.00, steps = 156\n",
      "22:54:47 [DEBUG] train episode 1275: reward = 150.00, steps = 150\n",
      "22:54:47 [DEBUG] train episode 1276: reward = 159.00, steps = 159\n",
      "22:54:47 [DEBUG] train episode 1277: reward = 131.00, steps = 131\n",
      "22:54:47 [DEBUG] train episode 1278: reward = 110.00, steps = 110\n",
      "22:54:47 [DEBUG] train episode 1279: reward = 177.00, steps = 177\n",
      "22:54:47 [DEBUG] train episode 1280: reward = 191.00, steps = 191\n",
      "22:54:47 [DEBUG] train episode 1281: reward = 199.00, steps = 199\n",
      "22:54:47 [DEBUG] train episode 1282: reward = 200.00, steps = 200\n",
      "22:54:47 [DEBUG] train episode 1283: reward = 174.00, steps = 174\n",
      "22:54:47 [DEBUG] train episode 1284: reward = 200.00, steps = 200\n",
      "22:54:47 [DEBUG] train episode 1285: reward = 200.00, steps = 200\n",
      "22:54:48 [DEBUG] train episode 1286: reward = 200.00, steps = 200\n",
      "22:54:48 [DEBUG] train episode 1287: reward = 28.00, steps = 28\n",
      "22:54:48 [DEBUG] train episode 1288: reward = 141.00, steps = 141\n",
      "22:54:48 [DEBUG] train episode 1289: reward = 135.00, steps = 135\n",
      "22:54:48 [DEBUG] train episode 1290: reward = 72.00, steps = 72\n",
      "22:54:48 [DEBUG] train episode 1291: reward = 200.00, steps = 200\n",
      "22:54:48 [DEBUG] train episode 1292: reward = 153.00, steps = 153\n",
      "22:54:48 [DEBUG] train episode 1293: reward = 200.00, steps = 200\n",
      "22:54:48 [DEBUG] train episode 1294: reward = 119.00, steps = 119\n",
      "22:54:48 [DEBUG] train episode 1295: reward = 184.00, steps = 184\n",
      "22:54:48 [DEBUG] train episode 1296: reward = 160.00, steps = 160\n",
      "22:54:48 [DEBUG] train episode 1297: reward = 200.00, steps = 200\n",
      "22:54:48 [DEBUG] train episode 1298: reward = 82.00, steps = 82\n",
      "22:54:48 [DEBUG] train episode 1299: reward = 200.00, steps = 200\n",
      "22:54:48 [DEBUG] train episode 1300: reward = 129.00, steps = 129\n",
      "22:54:49 [DEBUG] train episode 1301: reward = 200.00, steps = 200\n",
      "22:54:49 [DEBUG] train episode 1302: reward = 172.00, steps = 172\n",
      "22:54:49 [DEBUG] train episode 1303: reward = 31.00, steps = 31\n",
      "22:54:49 [DEBUG] train episode 1304: reward = 185.00, steps = 185\n",
      "22:54:49 [DEBUG] train episode 1305: reward = 56.00, steps = 56\n",
      "22:54:49 [DEBUG] train episode 1306: reward = 163.00, steps = 163\n",
      "22:54:49 [DEBUG] train episode 1307: reward = 156.00, steps = 156\n",
      "22:54:49 [DEBUG] train episode 1308: reward = 182.00, steps = 182\n",
      "22:54:49 [DEBUG] train episode 1309: reward = 200.00, steps = 200\n",
      "22:54:49 [DEBUG] train episode 1310: reward = 196.00, steps = 196\n",
      "22:54:49 [DEBUG] train episode 1311: reward = 200.00, steps = 200\n",
      "22:54:49 [DEBUG] train episode 1312: reward = 200.00, steps = 200\n",
      "22:54:49 [DEBUG] train episode 1313: reward = 182.00, steps = 182\n",
      "22:54:50 [DEBUG] train episode 1314: reward = 170.00, steps = 170\n",
      "22:54:50 [DEBUG] train episode 1315: reward = 39.00, steps = 39\n",
      "22:54:50 [DEBUG] train episode 1316: reward = 27.00, steps = 27\n",
      "22:54:50 [DEBUG] train episode 1317: reward = 200.00, steps = 200\n",
      "22:54:50 [DEBUG] train episode 1318: reward = 200.00, steps = 200\n",
      "22:54:50 [DEBUG] train episode 1319: reward = 114.00, steps = 114\n",
      "22:54:50 [DEBUG] train episode 1320: reward = 200.00, steps = 200\n",
      "22:54:50 [DEBUG] train episode 1321: reward = 200.00, steps = 200\n",
      "22:54:50 [DEBUG] train episode 1322: reward = 119.00, steps = 119\n",
      "22:54:50 [DEBUG] train episode 1323: reward = 200.00, steps = 200\n",
      "22:54:50 [DEBUG] train episode 1324: reward = 75.00, steps = 75\n",
      "22:54:50 [DEBUG] train episode 1325: reward = 146.00, steps = 146\n",
      "22:54:50 [DEBUG] train episode 1326: reward = 91.00, steps = 91\n",
      "22:54:50 [DEBUG] train episode 1327: reward = 200.00, steps = 200\n",
      "22:54:50 [DEBUG] train episode 1328: reward = 200.00, steps = 200\n",
      "22:54:51 [DEBUG] train episode 1329: reward = 200.00, steps = 200\n",
      "22:54:51 [DEBUG] train episode 1330: reward = 141.00, steps = 141\n",
      "22:54:51 [DEBUG] train episode 1331: reward = 142.00, steps = 142\n",
      "22:54:51 [DEBUG] train episode 1332: reward = 173.00, steps = 173\n",
      "22:54:51 [DEBUG] train episode 1333: reward = 126.00, steps = 126\n",
      "22:54:51 [DEBUG] train episode 1334: reward = 86.00, steps = 86\n",
      "22:54:51 [DEBUG] train episode 1335: reward = 127.00, steps = 127\n",
      "22:54:51 [DEBUG] train episode 1336: reward = 200.00, steps = 200\n",
      "22:54:51 [DEBUG] train episode 1337: reward = 68.00, steps = 68\n",
      "22:54:51 [DEBUG] train episode 1338: reward = 200.00, steps = 200\n",
      "22:54:51 [DEBUG] train episode 1339: reward = 200.00, steps = 200\n",
      "22:54:51 [DEBUG] train episode 1340: reward = 147.00, steps = 147\n",
      "22:54:51 [DEBUG] train episode 1341: reward = 199.00, steps = 199\n",
      "22:54:51 [DEBUG] train episode 1342: reward = 139.00, steps = 139\n",
      "22:54:51 [DEBUG] train episode 1343: reward = 200.00, steps = 200\n",
      "22:54:52 [DEBUG] train episode 1344: reward = 200.00, steps = 200\n",
      "22:54:52 [DEBUG] train episode 1345: reward = 200.00, steps = 200\n",
      "22:54:52 [DEBUG] train episode 1346: reward = 135.00, steps = 135\n",
      "22:54:52 [DEBUG] train episode 1347: reward = 200.00, steps = 200\n",
      "22:54:52 [DEBUG] train episode 1348: reward = 200.00, steps = 200\n",
      "22:54:52 [DEBUG] train episode 1349: reward = 77.00, steps = 77\n",
      "22:54:52 [DEBUG] train episode 1350: reward = 161.00, steps = 161\n",
      "22:54:52 [DEBUG] train episode 1351: reward = 158.00, steps = 158\n",
      "22:54:52 [DEBUG] train episode 1352: reward = 200.00, steps = 200\n",
      "22:54:52 [DEBUG] train episode 1353: reward = 153.00, steps = 153\n",
      "22:54:52 [DEBUG] train episode 1354: reward = 44.00, steps = 44\n",
      "22:54:52 [DEBUG] train episode 1355: reward = 156.00, steps = 156\n",
      "22:54:52 [DEBUG] train episode 1356: reward = 200.00, steps = 200\n",
      "22:54:52 [DEBUG] train episode 1357: reward = 189.00, steps = 189\n",
      "22:54:53 [DEBUG] train episode 1358: reward = 200.00, steps = 200\n",
      "22:54:53 [DEBUG] train episode 1359: reward = 87.00, steps = 87\n",
      "22:54:53 [DEBUG] train episode 1360: reward = 200.00, steps = 200\n",
      "22:54:53 [DEBUG] train episode 1361: reward = 200.00, steps = 200\n",
      "22:54:53 [DEBUG] train episode 1362: reward = 190.00, steps = 190\n",
      "22:54:53 [DEBUG] train episode 1363: reward = 161.00, steps = 161\n",
      "22:54:53 [DEBUG] train episode 1364: reward = 200.00, steps = 200\n",
      "22:54:53 [DEBUG] train episode 1365: reward = 60.00, steps = 60\n",
      "22:54:53 [DEBUG] train episode 1366: reward = 138.00, steps = 138\n",
      "22:54:53 [DEBUG] train episode 1367: reward = 200.00, steps = 200\n",
      "22:54:53 [DEBUG] train episode 1368: reward = 127.00, steps = 127\n",
      "22:54:53 [DEBUG] train episode 1369: reward = 200.00, steps = 200\n",
      "22:54:53 [DEBUG] train episode 1370: reward = 117.00, steps = 117\n",
      "22:54:53 [DEBUG] train episode 1371: reward = 200.00, steps = 200\n",
      "22:54:54 [DEBUG] train episode 1372: reward = 185.00, steps = 185\n",
      "22:54:54 [DEBUG] train episode 1373: reward = 45.00, steps = 45\n",
      "22:54:54 [DEBUG] train episode 1374: reward = 200.00, steps = 200\n",
      "22:54:54 [DEBUG] train episode 1375: reward = 171.00, steps = 171\n",
      "22:54:54 [DEBUG] train episode 1376: reward = 175.00, steps = 175\n",
      "22:54:54 [DEBUG] train episode 1377: reward = 200.00, steps = 200\n",
      "22:54:54 [DEBUG] train episode 1378: reward = 134.00, steps = 134\n",
      "22:54:54 [DEBUG] train episode 1379: reward = 190.00, steps = 190\n",
      "22:54:54 [DEBUG] train episode 1380: reward = 172.00, steps = 172\n",
      "22:54:54 [DEBUG] train episode 1381: reward = 200.00, steps = 200\n",
      "22:54:54 [DEBUG] train episode 1382: reward = 103.00, steps = 103\n",
      "22:54:54 [DEBUG] train episode 1383: reward = 200.00, steps = 200\n",
      "22:54:54 [DEBUG] train episode 1384: reward = 200.00, steps = 200\n",
      "22:54:55 [DEBUG] train episode 1385: reward = 198.00, steps = 198\n",
      "22:54:55 [DEBUG] train episode 1386: reward = 200.00, steps = 200\n",
      "22:54:55 [DEBUG] train episode 1387: reward = 121.00, steps = 121\n",
      "22:54:55 [DEBUG] train episode 1388: reward = 200.00, steps = 200\n",
      "22:54:55 [DEBUG] train episode 1389: reward = 183.00, steps = 183\n",
      "22:54:55 [DEBUG] train episode 1390: reward = 124.00, steps = 124\n",
      "22:54:55 [DEBUG] train episode 1391: reward = 200.00, steps = 200\n",
      "22:54:55 [DEBUG] train episode 1392: reward = 195.00, steps = 195\n",
      "22:54:55 [DEBUG] train episode 1393: reward = 199.00, steps = 199\n",
      "22:54:55 [DEBUG] train episode 1394: reward = 158.00, steps = 158\n",
      "22:54:55 [DEBUG] train episode 1395: reward = 200.00, steps = 200\n",
      "22:54:55 [DEBUG] train episode 1396: reward = 132.00, steps = 132\n",
      "22:54:55 [DEBUG] train episode 1397: reward = 200.00, steps = 200\n",
      "22:54:56 [DEBUG] train episode 1398: reward = 165.00, steps = 165\n",
      "22:54:56 [DEBUG] train episode 1399: reward = 133.00, steps = 133\n",
      "22:54:56 [DEBUG] train episode 1400: reward = 200.00, steps = 200\n",
      "22:54:56 [DEBUG] train episode 1401: reward = 36.00, steps = 36\n",
      "22:54:56 [DEBUG] train episode 1402: reward = 200.00, steps = 200\n",
      "22:54:56 [DEBUG] train episode 1403: reward = 200.00, steps = 200\n",
      "22:54:56 [DEBUG] train episode 1404: reward = 111.00, steps = 111\n",
      "22:54:56 [DEBUG] train episode 1405: reward = 200.00, steps = 200\n",
      "22:54:56 [DEBUG] train episode 1406: reward = 133.00, steps = 133\n",
      "22:54:56 [DEBUG] train episode 1407: reward = 165.00, steps = 165\n",
      "22:54:56 [DEBUG] train episode 1408: reward = 195.00, steps = 195\n",
      "22:54:56 [DEBUG] train episode 1409: reward = 137.00, steps = 137\n",
      "22:54:56 [DEBUG] train episode 1410: reward = 169.00, steps = 169\n",
      "22:54:57 [DEBUG] train episode 1411: reward = 156.00, steps = 156\n",
      "22:54:57 [DEBUG] train episode 1412: reward = 29.00, steps = 29\n",
      "22:54:57 [DEBUG] train episode 1413: reward = 183.00, steps = 183\n",
      "22:54:57 [DEBUG] train episode 1414: reward = 166.00, steps = 166\n",
      "22:54:57 [DEBUG] train episode 1415: reward = 200.00, steps = 200\n",
      "22:54:57 [DEBUG] train episode 1416: reward = 186.00, steps = 186\n",
      "22:54:57 [DEBUG] train episode 1417: reward = 165.00, steps = 165\n",
      "22:54:57 [DEBUG] train episode 1418: reward = 177.00, steps = 177\n",
      "22:54:57 [DEBUG] train episode 1419: reward = 164.00, steps = 164\n",
      "22:54:57 [DEBUG] train episode 1420: reward = 200.00, steps = 200\n",
      "22:54:57 [DEBUG] train episode 1421: reward = 200.00, steps = 200\n",
      "22:54:57 [DEBUG] train episode 1422: reward = 162.00, steps = 162\n",
      "22:54:57 [DEBUG] train episode 1423: reward = 200.00, steps = 200\n",
      "22:54:57 [DEBUG] train episode 1424: reward = 87.00, steps = 87\n",
      "22:54:58 [DEBUG] train episode 1425: reward = 174.00, steps = 174\n",
      "22:54:58 [DEBUG] train episode 1426: reward = 200.00, steps = 200\n",
      "22:54:58 [DEBUG] train episode 1427: reward = 126.00, steps = 126\n",
      "22:54:58 [DEBUG] train episode 1428: reward = 179.00, steps = 179\n",
      "22:54:58 [DEBUG] train episode 1429: reward = 200.00, steps = 200\n",
      "22:54:58 [DEBUG] train episode 1430: reward = 200.00, steps = 200\n",
      "22:54:58 [DEBUG] train episode 1431: reward = 200.00, steps = 200\n",
      "22:54:58 [DEBUG] train episode 1432: reward = 41.00, steps = 41\n",
      "22:54:58 [DEBUG] train episode 1433: reward = 185.00, steps = 185\n",
      "22:54:58 [DEBUG] train episode 1434: reward = 122.00, steps = 122\n",
      "22:54:58 [DEBUG] train episode 1435: reward = 144.00, steps = 144\n",
      "22:54:58 [DEBUG] train episode 1436: reward = 200.00, steps = 200\n",
      "22:54:59 [DEBUG] train episode 1437: reward = 200.00, steps = 200\n",
      "22:54:59 [DEBUG] train episode 1438: reward = 65.00, steps = 65\n",
      "22:54:59 [DEBUG] train episode 1439: reward = 152.00, steps = 152\n",
      "22:54:59 [DEBUG] train episode 1440: reward = 200.00, steps = 200\n",
      "22:54:59 [DEBUG] train episode 1441: reward = 200.00, steps = 200\n",
      "22:54:59 [DEBUG] train episode 1442: reward = 200.00, steps = 200\n",
      "22:54:59 [DEBUG] train episode 1443: reward = 153.00, steps = 153\n",
      "22:54:59 [DEBUG] train episode 1444: reward = 34.00, steps = 34\n",
      "22:54:59 [DEBUG] train episode 1445: reward = 200.00, steps = 200\n",
      "22:54:59 [DEBUG] train episode 1446: reward = 200.00, steps = 200\n",
      "22:54:59 [DEBUG] train episode 1447: reward = 173.00, steps = 173\n",
      "22:54:59 [DEBUG] train episode 1448: reward = 200.00, steps = 200\n",
      "22:54:59 [DEBUG] train episode 1449: reward = 127.00, steps = 127\n",
      "22:54:59 [DEBUG] train episode 1450: reward = 194.00, steps = 194\n",
      "22:55:00 [DEBUG] train episode 1451: reward = 200.00, steps = 200\n",
      "22:55:00 [DEBUG] train episode 1452: reward = 50.00, steps = 50\n",
      "22:55:00 [DEBUG] train episode 1453: reward = 76.00, steps = 76\n",
      "22:55:00 [DEBUG] train episode 1454: reward = 200.00, steps = 200\n",
      "22:55:00 [DEBUG] train episode 1455: reward = 200.00, steps = 200\n",
      "22:55:00 [DEBUG] train episode 1456: reward = 168.00, steps = 168\n",
      "22:55:00 [DEBUG] train episode 1457: reward = 125.00, steps = 125\n",
      "22:55:00 [DEBUG] train episode 1458: reward = 178.00, steps = 178\n",
      "22:55:00 [DEBUG] train episode 1459: reward = 200.00, steps = 200\n",
      "22:55:00 [DEBUG] train episode 1460: reward = 72.00, steps = 72\n",
      "22:55:00 [DEBUG] train episode 1461: reward = 82.00, steps = 82\n",
      "22:55:00 [DEBUG] train episode 1462: reward = 137.00, steps = 137\n",
      "22:55:01 [DEBUG] train episode 1463: reward = 158.00, steps = 158\n",
      "22:55:01 [DEBUG] train episode 1464: reward = 200.00, steps = 200\n",
      "22:55:01 [DEBUG] train episode 1465: reward = 200.00, steps = 200\n",
      "22:55:01 [DEBUG] train episode 1466: reward = 200.00, steps = 200\n",
      "22:55:01 [DEBUG] train episode 1467: reward = 200.00, steps = 200\n",
      "22:55:01 [DEBUG] train episode 1468: reward = 200.00, steps = 200\n",
      "22:55:01 [DEBUG] train episode 1469: reward = 148.00, steps = 148\n",
      "22:55:01 [DEBUG] train episode 1470: reward = 200.00, steps = 200\n",
      "22:55:01 [DEBUG] train episode 1471: reward = 156.00, steps = 156\n",
      "22:55:01 [DEBUG] train episode 1472: reward = 200.00, steps = 200\n",
      "22:55:01 [DEBUG] train episode 1473: reward = 51.00, steps = 51\n",
      "22:55:01 [DEBUG] train episode 1474: reward = 173.00, steps = 173\n",
      "22:55:02 [DEBUG] train episode 1475: reward = 200.00, steps = 200\n",
      "22:55:02 [DEBUG] train episode 1476: reward = 200.00, steps = 200\n",
      "22:55:02 [DEBUG] train episode 1477: reward = 185.00, steps = 185\n",
      "22:55:02 [DEBUG] train episode 1478: reward = 90.00, steps = 90\n",
      "22:55:02 [DEBUG] train episode 1479: reward = 137.00, steps = 137\n",
      "22:55:02 [DEBUG] train episode 1480: reward = 130.00, steps = 130\n",
      "22:55:02 [DEBUG] train episode 1481: reward = 200.00, steps = 200\n",
      "22:55:02 [DEBUG] train episode 1482: reward = 111.00, steps = 111\n",
      "22:55:02 [DEBUG] train episode 1483: reward = 200.00, steps = 200\n",
      "22:55:02 [DEBUG] train episode 1484: reward = 111.00, steps = 111\n",
      "22:55:02 [DEBUG] train episode 1485: reward = 185.00, steps = 185\n",
      "22:55:02 [DEBUG] train episode 1486: reward = 113.00, steps = 113\n",
      "22:55:02 [DEBUG] train episode 1487: reward = 131.00, steps = 131\n",
      "22:55:02 [DEBUG] train episode 1488: reward = 176.00, steps = 176\n",
      "22:55:02 [DEBUG] train episode 1489: reward = 171.00, steps = 171\n",
      "22:55:03 [DEBUG] train episode 1490: reward = 185.00, steps = 185\n",
      "22:55:03 [DEBUG] train episode 1491: reward = 156.00, steps = 156\n",
      "22:55:03 [DEBUG] train episode 1492: reward = 200.00, steps = 200\n",
      "22:55:03 [DEBUG] train episode 1493: reward = 200.00, steps = 200\n",
      "22:55:03 [DEBUG] train episode 1494: reward = 200.00, steps = 200\n",
      "22:55:03 [DEBUG] train episode 1495: reward = 200.00, steps = 200\n",
      "22:55:03 [DEBUG] train episode 1496: reward = 200.00, steps = 200\n",
      "22:55:03 [DEBUG] train episode 1497: reward = 200.00, steps = 200\n",
      "22:55:03 [DEBUG] train episode 1498: reward = 200.00, steps = 200\n",
      "22:55:03 [DEBUG] train episode 1499: reward = 200.00, steps = 200\n",
      "22:55:03 [DEBUG] train episode 1500: reward = 200.00, steps = 200\n",
      "22:55:04 [DEBUG] train episode 1501: reward = 159.00, steps = 159\n",
      "22:55:04 [DEBUG] train episode 1502: reward = 174.00, steps = 174\n",
      "22:55:04 [DEBUG] train episode 1503: reward = 200.00, steps = 200\n",
      "22:55:04 [DEBUG] train episode 1504: reward = 200.00, steps = 200\n",
      "22:55:04 [DEBUG] train episode 1505: reward = 154.00, steps = 154\n",
      "22:55:04 [DEBUG] train episode 1506: reward = 123.00, steps = 123\n",
      "22:55:04 [DEBUG] train episode 1507: reward = 191.00, steps = 191\n",
      "22:55:04 [DEBUG] train episode 1508: reward = 158.00, steps = 158\n",
      "22:55:04 [DEBUG] train episode 1509: reward = 128.00, steps = 128\n",
      "22:55:04 [DEBUG] train episode 1510: reward = 200.00, steps = 200\n",
      "22:55:04 [DEBUG] train episode 1511: reward = 88.00, steps = 88\n",
      "22:55:04 [DEBUG] train episode 1512: reward = 200.00, steps = 200\n",
      "22:55:05 [DEBUG] train episode 1513: reward = 187.00, steps = 187\n",
      "22:55:05 [DEBUG] train episode 1514: reward = 200.00, steps = 200\n",
      "22:55:05 [DEBUG] train episode 1515: reward = 170.00, steps = 170\n",
      "22:55:05 [DEBUG] train episode 1516: reward = 194.00, steps = 194\n",
      "22:55:05 [DEBUG] train episode 1517: reward = 200.00, steps = 200\n",
      "22:55:05 [DEBUG] train episode 1518: reward = 200.00, steps = 200\n",
      "22:55:05 [DEBUG] train episode 1519: reward = 176.00, steps = 176\n",
      "22:55:05 [DEBUG] train episode 1520: reward = 142.00, steps = 142\n",
      "22:55:05 [DEBUG] train episode 1521: reward = 200.00, steps = 200\n",
      "22:55:05 [DEBUG] train episode 1522: reward = 200.00, steps = 200\n",
      "22:55:06 [DEBUG] train episode 1523: reward = 158.00, steps = 158\n",
      "22:55:06 [DEBUG] train episode 1524: reward = 200.00, steps = 200\n",
      "22:55:06 [DEBUG] train episode 1525: reward = 103.00, steps = 103\n",
      "22:55:06 [DEBUG] train episode 1526: reward = 186.00, steps = 186\n",
      "22:55:06 [DEBUG] train episode 1527: reward = 200.00, steps = 200\n",
      "22:55:06 [DEBUG] train episode 1528: reward = 144.00, steps = 144\n",
      "22:55:06 [DEBUG] train episode 1529: reward = 200.00, steps = 200\n",
      "22:55:06 [DEBUG] train episode 1530: reward = 200.00, steps = 200\n",
      "22:55:06 [DEBUG] train episode 1531: reward = 200.00, steps = 200\n",
      "22:55:06 [DEBUG] train episode 1532: reward = 200.00, steps = 200\n",
      "22:55:07 [DEBUG] train episode 1533: reward = 197.00, steps = 197\n",
      "22:55:07 [DEBUG] train episode 1534: reward = 132.00, steps = 132\n",
      "22:55:07 [DEBUG] train episode 1535: reward = 200.00, steps = 200\n",
      "22:55:07 [DEBUG] train episode 1536: reward = 113.00, steps = 113\n",
      "22:55:07 [DEBUG] train episode 1537: reward = 143.00, steps = 143\n",
      "22:55:07 [DEBUG] train episode 1538: reward = 200.00, steps = 200\n",
      "22:55:07 [DEBUG] train episode 1539: reward = 124.00, steps = 124\n",
      "22:55:07 [DEBUG] train episode 1540: reward = 36.00, steps = 36\n",
      "22:55:07 [DEBUG] train episode 1541: reward = 149.00, steps = 149\n",
      "22:55:07 [DEBUG] train episode 1542: reward = 147.00, steps = 147\n",
      "22:55:07 [DEBUG] train episode 1543: reward = 200.00, steps = 200\n",
      "22:55:07 [DEBUG] train episode 1544: reward = 74.00, steps = 74\n",
      "22:55:08 [DEBUG] train episode 1545: reward = 200.00, steps = 200\n",
      "22:55:08 [DEBUG] train episode 1546: reward = 200.00, steps = 200\n",
      "22:55:08 [DEBUG] train episode 1547: reward = 200.00, steps = 200\n",
      "22:55:08 [DEBUG] train episode 1548: reward = 200.00, steps = 200\n",
      "22:55:08 [DEBUG] train episode 1549: reward = 200.00, steps = 200\n",
      "22:55:08 [DEBUG] train episode 1550: reward = 164.00, steps = 164\n",
      "22:55:08 [DEBUG] train episode 1551: reward = 200.00, steps = 200\n",
      "22:55:08 [DEBUG] train episode 1552: reward = 48.00, steps = 48\n",
      "22:55:08 [DEBUG] train episode 1553: reward = 120.00, steps = 120\n",
      "22:55:08 [DEBUG] train episode 1554: reward = 172.00, steps = 172\n",
      "22:55:08 [DEBUG] train episode 1555: reward = 170.00, steps = 170\n",
      "22:55:08 [DEBUG] train episode 1556: reward = 151.00, steps = 151\n",
      "22:55:09 [DEBUG] train episode 1557: reward = 200.00, steps = 200\n",
      "22:55:09 [DEBUG] train episode 1558: reward = 200.00, steps = 200\n",
      "22:55:09 [DEBUG] train episode 1559: reward = 200.00, steps = 200\n",
      "22:55:09 [DEBUG] train episode 1560: reward = 154.00, steps = 154\n",
      "22:55:09 [DEBUG] train episode 1561: reward = 174.00, steps = 174\n",
      "22:55:09 [DEBUG] train episode 1562: reward = 200.00, steps = 200\n",
      "22:55:09 [DEBUG] train episode 1563: reward = 169.00, steps = 169\n",
      "22:55:09 [DEBUG] train episode 1564: reward = 200.00, steps = 200\n",
      "22:55:09 [DEBUG] train episode 1565: reward = 200.00, steps = 200\n",
      "22:55:09 [DEBUG] train episode 1566: reward = 159.00, steps = 159\n",
      "22:55:09 [DEBUG] train episode 1567: reward = 163.00, steps = 163\n",
      "22:55:10 [DEBUG] train episode 1568: reward = 144.00, steps = 144\n",
      "22:55:10 [DEBUG] train episode 1569: reward = 53.00, steps = 53\n",
      "22:55:10 [DEBUG] train episode 1570: reward = 181.00, steps = 181\n",
      "22:55:10 [DEBUG] train episode 1571: reward = 36.00, steps = 36\n",
      "22:55:10 [DEBUG] train episode 1572: reward = 200.00, steps = 200\n",
      "22:55:10 [DEBUG] train episode 1573: reward = 183.00, steps = 183\n",
      "22:55:10 [DEBUG] train episode 1574: reward = 200.00, steps = 200\n",
      "22:55:10 [DEBUG] train episode 1575: reward = 147.00, steps = 147\n",
      "22:55:10 [DEBUG] train episode 1576: reward = 200.00, steps = 200\n",
      "22:55:10 [DEBUG] train episode 1577: reward = 200.00, steps = 200\n",
      "22:55:10 [DEBUG] train episode 1578: reward = 160.00, steps = 160\n",
      "22:55:10 [DEBUG] train episode 1579: reward = 94.00, steps = 94\n",
      "22:55:10 [DEBUG] train episode 1580: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1581: reward = 183.00, steps = 183\n",
      "22:55:11 [DEBUG] train episode 1582: reward = 191.00, steps = 191\n",
      "22:55:11 [DEBUG] train episode 1583: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1584: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1585: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1586: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1587: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1588: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1589: reward = 104.00, steps = 104\n",
      "22:55:11 [DEBUG] train episode 1590: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1591: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1592: reward = 200.00, steps = 200\n",
      "22:55:12 [DEBUG] train episode 1593: reward = 200.00, steps = 200\n",
      "22:55:12 [DEBUG] train episode 1594: reward = 37.00, steps = 37\n",
      "22:55:12 [DEBUG] train episode 1595: reward = 200.00, steps = 200\n",
      "22:55:12 [DEBUG] train episode 1596: reward = 200.00, steps = 200\n",
      "22:55:12 [DEBUG] train episode 1597: reward = 200.00, steps = 200\n",
      "22:55:12 [DEBUG] train episode 1598: reward = 200.00, steps = 200\n",
      "22:55:12 [DEBUG] train episode 1599: reward = 200.00, steps = 200\n",
      "22:55:12 [DEBUG] train episode 1600: reward = 200.00, steps = 200\n",
      "22:55:12 [DEBUG] train episode 1601: reward = 200.00, steps = 200\n",
      "22:55:13 [DEBUG] train episode 1602: reward = 200.00, steps = 200\n",
      "22:55:13 [DEBUG] train episode 1603: reward = 200.00, steps = 200\n",
      "22:55:13 [DEBUG] train episode 1604: reward = 200.00, steps = 200\n",
      "22:55:13 [DEBUG] train episode 1605: reward = 200.00, steps = 200\n",
      "22:55:13 [DEBUG] train episode 1606: reward = 200.00, steps = 200\n",
      "22:55:13 [DEBUG] train episode 1607: reward = 200.00, steps = 200\n",
      "22:55:13 [DEBUG] train episode 1608: reward = 147.00, steps = 147\n",
      "22:55:13 [DEBUG] train episode 1609: reward = 179.00, steps = 179\n",
      "22:55:13 [DEBUG] train episode 1610: reward = 200.00, steps = 200\n",
      "22:55:13 [DEBUG] train episode 1611: reward = 167.00, steps = 167\n",
      "22:55:13 [DEBUG] train episode 1612: reward = 200.00, steps = 200\n",
      "22:55:14 [DEBUG] train episode 1613: reward = 200.00, steps = 200\n",
      "22:55:14 [DEBUG] train episode 1614: reward = 200.00, steps = 200\n",
      "22:55:14 [DEBUG] train episode 1615: reward = 109.00, steps = 109\n",
      "22:55:14 [DEBUG] train episode 1616: reward = 200.00, steps = 200\n",
      "22:55:14 [DEBUG] train episode 1617: reward = 159.00, steps = 159\n",
      "22:55:14 [DEBUG] train episode 1618: reward = 200.00, steps = 200\n",
      "22:55:14 [DEBUG] train episode 1619: reward = 200.00, steps = 200\n",
      "22:55:14 [DEBUG] train episode 1620: reward = 200.00, steps = 200\n",
      "22:55:14 [DEBUG] train episode 1621: reward = 200.00, steps = 200\n",
      "22:55:14 [DEBUG] train episode 1622: reward = 200.00, steps = 200\n",
      "22:55:15 [DEBUG] train episode 1623: reward = 200.00, steps = 200\n",
      "22:55:15 [DEBUG] train episode 1624: reward = 173.00, steps = 173\n",
      "22:55:15 [DEBUG] train episode 1625: reward = 200.00, steps = 200\n",
      "22:55:15 [DEBUG] train episode 1626: reward = 120.00, steps = 120\n",
      "22:55:15 [DEBUG] train episode 1627: reward = 200.00, steps = 200\n",
      "22:55:15 [DEBUG] train episode 1628: reward = 200.00, steps = 200\n",
      "22:55:15 [DEBUG] train episode 1629: reward = 144.00, steps = 144\n",
      "22:55:15 [DEBUG] train episode 1630: reward = 200.00, steps = 200\n",
      "22:55:15 [DEBUG] train episode 1631: reward = 200.00, steps = 200\n",
      "22:55:15 [DEBUG] train episode 1632: reward = 163.00, steps = 163\n",
      "22:55:15 [DEBUG] train episode 1633: reward = 182.00, steps = 182\n",
      "22:55:16 [DEBUG] train episode 1634: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1635: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1636: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1637: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1638: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1639: reward = 155.00, steps = 155\n",
      "22:55:16 [DEBUG] train episode 1640: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1641: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1642: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1643: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1644: reward = 175.00, steps = 175\n",
      "22:55:16 [DEBUG] train episode 1645: reward = 200.00, steps = 200\n",
      "22:55:17 [DEBUG] train episode 1646: reward = 200.00, steps = 200\n",
      "22:55:17 [DEBUG] train episode 1647: reward = 135.00, steps = 135\n",
      "22:55:17 [DEBUG] train episode 1648: reward = 200.00, steps = 200\n",
      "22:55:17 [DEBUG] train episode 1649: reward = 200.00, steps = 200\n",
      "22:55:17 [DEBUG] train episode 1650: reward = 200.00, steps = 200\n",
      "22:55:17 [DEBUG] train episode 1651: reward = 200.00, steps = 200\n",
      "22:55:17 [DEBUG] train episode 1652: reward = 200.00, steps = 200\n",
      "22:55:17 [DEBUG] train episode 1653: reward = 200.00, steps = 200\n",
      "22:55:17 [DEBUG] train episode 1654: reward = 200.00, steps = 200\n",
      "22:55:18 [DEBUG] train episode 1655: reward = 200.00, steps = 200\n",
      "22:55:18 [DEBUG] train episode 1656: reward = 200.00, steps = 200\n",
      "22:55:18 [DEBUG] train episode 1657: reward = 33.00, steps = 33\n",
      "22:55:18 [DEBUG] train episode 1658: reward = 200.00, steps = 200\n",
      "22:55:18 [DEBUG] train episode 1659: reward = 189.00, steps = 189\n",
      "22:55:18 [DEBUG] train episode 1660: reward = 96.00, steps = 96\n",
      "22:55:18 [DEBUG] train episode 1661: reward = 200.00, steps = 200\n",
      "22:55:18 [DEBUG] train episode 1662: reward = 152.00, steps = 152\n",
      "22:55:18 [DEBUG] train episode 1663: reward = 200.00, steps = 200\n",
      "22:55:18 [DEBUG] train episode 1664: reward = 200.00, steps = 200\n",
      "22:55:18 [DEBUG] train episode 1665: reward = 175.00, steps = 175\n",
      "22:55:19 [DEBUG] train episode 1666: reward = 200.00, steps = 200\n",
      "22:55:19 [DEBUG] train episode 1667: reward = 168.00, steps = 168\n",
      "22:55:19 [DEBUG] train episode 1668: reward = 200.00, steps = 200\n",
      "22:55:19 [DEBUG] train episode 1669: reward = 194.00, steps = 194\n",
      "22:55:19 [DEBUG] train episode 1670: reward = 200.00, steps = 200\n",
      "22:55:19 [DEBUG] train episode 1671: reward = 159.00, steps = 159\n",
      "22:55:19 [DEBUG] train episode 1672: reward = 200.00, steps = 200\n",
      "22:55:19 [DEBUG] train episode 1673: reward = 200.00, steps = 200\n",
      "22:55:19 [DEBUG] train episode 1674: reward = 135.00, steps = 135\n",
      "22:55:19 [DEBUG] train episode 1675: reward = 107.00, steps = 107\n",
      "22:55:20 [DEBUG] train episode 1676: reward = 200.00, steps = 200\n",
      "22:55:20 [DEBUG] train episode 1677: reward = 31.00, steps = 31\n",
      "22:55:20 [DEBUG] train episode 1678: reward = 53.00, steps = 53\n",
      "22:55:20 [DEBUG] train episode 1679: reward = 165.00, steps = 165\n",
      "22:55:20 [DEBUG] train episode 1680: reward = 18.00, steps = 18\n",
      "22:55:20 [DEBUG] train episode 1681: reward = 200.00, steps = 200\n",
      "22:55:20 [DEBUG] train episode 1682: reward = 200.00, steps = 200\n",
      "22:55:20 [DEBUG] train episode 1683: reward = 200.00, steps = 200\n",
      "22:55:20 [DEBUG] train episode 1684: reward = 200.00, steps = 200\n",
      "22:55:20 [DEBUG] train episode 1685: reward = 200.00, steps = 200\n",
      "22:55:20 [DEBUG] train episode 1686: reward = 200.00, steps = 200\n",
      "22:55:20 [DEBUG] train episode 1687: reward = 200.00, steps = 200\n",
      "22:55:20 [DEBUG] train episode 1688: reward = 200.00, steps = 200\n",
      "22:55:21 [DEBUG] train episode 1689: reward = 165.00, steps = 165\n",
      "22:55:21 [DEBUG] train episode 1690: reward = 108.00, steps = 108\n",
      "22:55:21 [DEBUG] train episode 1691: reward = 200.00, steps = 200\n",
      "22:55:21 [DEBUG] train episode 1692: reward = 200.00, steps = 200\n",
      "22:55:21 [DEBUG] train episode 1693: reward = 200.00, steps = 200\n",
      "22:55:21 [DEBUG] train episode 1694: reward = 44.00, steps = 44\n",
      "22:55:21 [DEBUG] train episode 1695: reward = 70.00, steps = 70\n",
      "22:55:21 [DEBUG] train episode 1696: reward = 200.00, steps = 200\n",
      "22:55:21 [DEBUG] train episode 1697: reward = 79.00, steps = 79\n",
      "22:55:21 [DEBUG] train episode 1698: reward = 200.00, steps = 200\n",
      "22:55:21 [DEBUG] train episode 1699: reward = 200.00, steps = 200\n",
      "22:55:21 [DEBUG] train episode 1700: reward = 128.00, steps = 128\n",
      "22:55:21 [DEBUG] train episode 1701: reward = 200.00, steps = 200\n",
      "22:55:21 [DEBUG] train episode 1702: reward = 131.00, steps = 131\n",
      "22:55:21 [DEBUG] train episode 1703: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1704: reward = 142.00, steps = 142\n",
      "22:55:22 [DEBUG] train episode 1705: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1706: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1707: reward = 68.00, steps = 68\n",
      "22:55:22 [DEBUG] train episode 1708: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1709: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1710: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1711: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1712: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1713: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1714: reward = 200.00, steps = 200\n",
      "22:55:23 [DEBUG] train episode 1715: reward = 200.00, steps = 200\n",
      "22:55:23 [DEBUG] train episode 1716: reward = 200.00, steps = 200\n",
      "22:55:23 [DEBUG] train episode 1717: reward = 200.00, steps = 200\n",
      "22:55:23 [DEBUG] train episode 1718: reward = 61.00, steps = 61\n",
      "22:55:23 [DEBUG] train episode 1719: reward = 200.00, steps = 200\n",
      "22:55:23 [DEBUG] train episode 1720: reward = 200.00, steps = 200\n",
      "22:55:23 [DEBUG] train episode 1721: reward = 154.00, steps = 154\n",
      "22:55:23 [DEBUG] train episode 1722: reward = 200.00, steps = 200\n",
      "22:55:23 [DEBUG] train episode 1723: reward = 200.00, steps = 200\n",
      "22:55:23 [DEBUG] train episode 1724: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1725: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1726: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1727: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1728: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1729: reward = 80.00, steps = 80\n",
      "22:55:24 [DEBUG] train episode 1730: reward = 182.00, steps = 182\n",
      "22:55:24 [DEBUG] train episode 1731: reward = 136.00, steps = 136\n",
      "22:55:24 [DEBUG] train episode 1732: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1733: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1734: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1735: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1736: reward = 82.00, steps = 82\n",
      "22:55:25 [DEBUG] train episode 1737: reward = 200.00, steps = 200\n",
      "22:55:25 [DEBUG] train episode 1738: reward = 177.00, steps = 177\n",
      "22:55:25 [DEBUG] train episode 1739: reward = 200.00, steps = 200\n",
      "22:55:25 [DEBUG] train episode 1740: reward = 200.00, steps = 200\n",
      "22:55:25 [DEBUG] train episode 1741: reward = 200.00, steps = 200\n",
      "22:55:25 [DEBUG] train episode 1742: reward = 193.00, steps = 193\n",
      "22:55:25 [DEBUG] train episode 1743: reward = 200.00, steps = 200\n",
      "22:55:25 [DEBUG] train episode 1744: reward = 76.00, steps = 76\n",
      "22:55:25 [DEBUG] train episode 1745: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1746: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1747: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1748: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1749: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1750: reward = 154.00, steps = 154\n",
      "22:55:26 [DEBUG] train episode 1751: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1752: reward = 199.00, steps = 199\n",
      "22:55:26 [DEBUG] train episode 1753: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1754: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1755: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1756: reward = 71.00, steps = 71\n",
      "22:55:27 [DEBUG] train episode 1757: reward = 200.00, steps = 200\n",
      "22:55:27 [DEBUG] train episode 1758: reward = 135.00, steps = 135\n",
      "22:55:27 [DEBUG] train episode 1759: reward = 84.00, steps = 84\n",
      "22:55:27 [DEBUG] train episode 1760: reward = 195.00, steps = 195\n",
      "22:55:27 [DEBUG] train episode 1761: reward = 200.00, steps = 200\n",
      "22:55:27 [DEBUG] train episode 1762: reward = 200.00, steps = 200\n",
      "22:55:27 [DEBUG] train episode 1763: reward = 200.00, steps = 200\n",
      "22:55:27 [DEBUG] train episode 1764: reward = 173.00, steps = 173\n",
      "22:55:27 [DEBUG] train episode 1765: reward = 200.00, steps = 200\n",
      "22:55:27 [DEBUG] train episode 1766: reward = 200.00, steps = 200\n",
      "22:55:28 [DEBUG] train episode 1767: reward = 37.00, steps = 37\n",
      "22:55:28 [DEBUG] train episode 1768: reward = 37.00, steps = 37\n",
      "22:55:28 [DEBUG] train episode 1769: reward = 200.00, steps = 200\n",
      "22:55:28 [DEBUG] train episode 1770: reward = 29.00, steps = 29\n",
      "22:55:28 [DEBUG] train episode 1771: reward = 145.00, steps = 145\n",
      "22:55:28 [DEBUG] train episode 1772: reward = 118.00, steps = 118\n",
      "22:55:28 [DEBUG] train episode 1773: reward = 86.00, steps = 86\n",
      "22:55:28 [DEBUG] train episode 1774: reward = 200.00, steps = 200\n",
      "22:55:28 [DEBUG] train episode 1775: reward = 193.00, steps = 193\n",
      "22:55:28 [DEBUG] train episode 1776: reward = 200.00, steps = 200\n",
      "22:55:28 [DEBUG] train episode 1777: reward = 109.00, steps = 109\n",
      "22:55:28 [DEBUG] train episode 1778: reward = 200.00, steps = 200\n",
      "22:55:28 [DEBUG] train episode 1779: reward = 146.00, steps = 146\n",
      "22:55:28 [DEBUG] train episode 1780: reward = 200.00, steps = 200\n",
      "22:55:28 [DEBUG] train episode 1781: reward = 200.00, steps = 200\n",
      "22:55:29 [DEBUG] train episode 1782: reward = 175.00, steps = 175\n",
      "22:55:29 [DEBUG] train episode 1783: reward = 200.00, steps = 200\n",
      "22:55:29 [DEBUG] train episode 1784: reward = 147.00, steps = 147\n",
      "22:55:29 [DEBUG] train episode 1785: reward = 200.00, steps = 200\n",
      "22:55:29 [DEBUG] train episode 1786: reward = 181.00, steps = 181\n",
      "22:55:29 [DEBUG] train episode 1787: reward = 162.00, steps = 162\n",
      "22:55:29 [DEBUG] train episode 1788: reward = 52.00, steps = 52\n",
      "22:55:29 [DEBUG] train episode 1789: reward = 200.00, steps = 200\n",
      "22:55:29 [DEBUG] train episode 1790: reward = 200.00, steps = 200\n",
      "22:55:29 [DEBUG] train episode 1791: reward = 200.00, steps = 200\n",
      "22:55:29 [DEBUG] train episode 1792: reward = 200.00, steps = 200\n",
      "22:55:29 [DEBUG] train episode 1793: reward = 21.00, steps = 21\n",
      "22:55:30 [DEBUG] train episode 1794: reward = 200.00, steps = 200\n",
      "22:55:30 [DEBUG] train episode 1795: reward = 200.00, steps = 200\n",
      "22:55:30 [DEBUG] train episode 1796: reward = 133.00, steps = 133\n",
      "22:55:30 [DEBUG] train episode 1797: reward = 140.00, steps = 140\n",
      "22:55:30 [DEBUG] train episode 1798: reward = 198.00, steps = 198\n",
      "22:55:30 [DEBUG] train episode 1799: reward = 200.00, steps = 200\n",
      "22:55:30 [DEBUG] train episode 1800: reward = 200.00, steps = 200\n",
      "22:55:30 [DEBUG] train episode 1801: reward = 200.00, steps = 200\n",
      "22:55:30 [DEBUG] train episode 1802: reward = 175.00, steps = 175\n",
      "22:55:30 [DEBUG] train episode 1803: reward = 200.00, steps = 200\n",
      "22:55:31 [DEBUG] train episode 1804: reward = 200.00, steps = 200\n",
      "22:55:31 [DEBUG] train episode 1805: reward = 58.00, steps = 58\n",
      "22:55:31 [DEBUG] train episode 1806: reward = 200.00, steps = 200\n",
      "22:55:31 [DEBUG] train episode 1807: reward = 200.00, steps = 200\n",
      "22:55:31 [DEBUG] train episode 1808: reward = 200.00, steps = 200\n",
      "22:55:31 [DEBUG] train episode 1809: reward = 200.00, steps = 200\n",
      "22:55:31 [DEBUG] train episode 1810: reward = 200.00, steps = 200\n",
      "22:55:31 [DEBUG] train episode 1811: reward = 159.00, steps = 159\n",
      "22:55:31 [DEBUG] train episode 1812: reward = 200.00, steps = 200\n",
      "22:55:32 [DEBUG] train episode 1813: reward = 200.00, steps = 200\n",
      "22:55:32 [DEBUG] train episode 1814: reward = 200.00, steps = 200\n",
      "22:55:32 [DEBUG] train episode 1815: reward = 161.00, steps = 161\n",
      "22:55:32 [DEBUG] train episode 1816: reward = 101.00, steps = 101\n",
      "22:55:32 [DEBUG] train episode 1817: reward = 200.00, steps = 200\n",
      "22:55:32 [DEBUG] train episode 1818: reward = 137.00, steps = 137\n",
      "22:55:32 [DEBUG] train episode 1819: reward = 103.00, steps = 103\n",
      "22:55:32 [DEBUG] train episode 1820: reward = 200.00, steps = 200\n",
      "22:55:32 [DEBUG] train episode 1821: reward = 200.00, steps = 200\n",
      "22:55:32 [DEBUG] train episode 1822: reward = 63.00, steps = 63\n",
      "22:55:32 [DEBUG] train episode 1823: reward = 200.00, steps = 200\n",
      "22:55:32 [DEBUG] train episode 1824: reward = 129.00, steps = 129\n",
      "22:55:33 [DEBUG] train episode 1825: reward = 200.00, steps = 200\n",
      "22:55:33 [DEBUG] train episode 1826: reward = 200.00, steps = 200\n",
      "22:55:33 [DEBUG] train episode 1827: reward = 36.00, steps = 36\n",
      "22:55:33 [DEBUG] train episode 1828: reward = 185.00, steps = 185\n",
      "22:55:33 [DEBUG] train episode 1829: reward = 200.00, steps = 200\n",
      "22:55:33 [DEBUG] train episode 1830: reward = 200.00, steps = 200\n",
      "22:55:33 [DEBUG] train episode 1831: reward = 200.00, steps = 200\n",
      "22:55:33 [DEBUG] train episode 1832: reward = 200.00, steps = 200\n",
      "22:55:33 [DEBUG] train episode 1833: reward = 178.00, steps = 178\n",
      "22:55:33 [DEBUG] train episode 1834: reward = 200.00, steps = 200\n",
      "22:55:33 [DEBUG] train episode 1835: reward = 200.00, steps = 200\n",
      "22:55:34 [DEBUG] train episode 1836: reward = 200.00, steps = 200\n",
      "22:55:34 [DEBUG] train episode 1837: reward = 153.00, steps = 153\n",
      "22:55:34 [DEBUG] train episode 1838: reward = 200.00, steps = 200\n",
      "22:55:34 [DEBUG] train episode 1839: reward = 180.00, steps = 180\n",
      "22:55:34 [DEBUG] train episode 1840: reward = 200.00, steps = 200\n",
      "22:55:34 [DEBUG] train episode 1841: reward = 200.00, steps = 200\n",
      "22:55:34 [DEBUG] train episode 1842: reward = 200.00, steps = 200\n",
      "22:55:34 [DEBUG] train episode 1843: reward = 66.00, steps = 66\n",
      "22:55:34 [DEBUG] train episode 1844: reward = 18.00, steps = 18\n",
      "22:55:34 [DEBUG] train episode 1845: reward = 200.00, steps = 200\n",
      "22:55:34 [DEBUG] train episode 1846: reward = 200.00, steps = 200\n",
      "22:55:35 [DEBUG] train episode 1847: reward = 187.00, steps = 187\n",
      "22:55:35 [DEBUG] train episode 1848: reward = 200.00, steps = 200\n",
      "22:55:35 [DEBUG] train episode 1849: reward = 100.00, steps = 100\n",
      "22:55:35 [DEBUG] train episode 1850: reward = 200.00, steps = 200\n",
      "22:55:35 [DEBUG] train episode 1851: reward = 30.00, steps = 30\n",
      "22:55:35 [DEBUG] train episode 1852: reward = 106.00, steps = 106\n",
      "22:55:35 [DEBUG] train episode 1853: reward = 200.00, steps = 200\n",
      "22:55:35 [DEBUG] train episode 1854: reward = 162.00, steps = 162\n",
      "22:55:35 [DEBUG] train episode 1855: reward = 200.00, steps = 200\n",
      "22:55:35 [DEBUG] train episode 1856: reward = 200.00, steps = 200\n",
      "22:55:35 [DEBUG] train episode 1857: reward = 200.00, steps = 200\n",
      "22:55:35 [DEBUG] train episode 1858: reward = 23.00, steps = 23\n",
      "22:55:36 [DEBUG] train episode 1859: reward = 175.00, steps = 175\n",
      "22:55:36 [DEBUG] train episode 1860: reward = 27.00, steps = 27\n",
      "22:55:36 [DEBUG] train episode 1861: reward = 156.00, steps = 156\n",
      "22:55:36 [DEBUG] train episode 1862: reward = 200.00, steps = 200\n",
      "22:55:36 [DEBUG] train episode 1863: reward = 200.00, steps = 200\n",
      "22:55:36 [DEBUG] train episode 1864: reward = 200.00, steps = 200\n",
      "22:55:36 [DEBUG] train episode 1865: reward = 200.00, steps = 200\n",
      "22:55:36 [DEBUG] train episode 1866: reward = 155.00, steps = 155\n",
      "22:55:36 [DEBUG] train episode 1867: reward = 175.00, steps = 175\n",
      "22:55:36 [DEBUG] train episode 1868: reward = 200.00, steps = 200\n",
      "22:55:36 [DEBUG] train episode 1869: reward = 200.00, steps = 200\n",
      "22:55:37 [DEBUG] train episode 1870: reward = 176.00, steps = 176\n",
      "22:55:37 [DEBUG] train episode 1871: reward = 200.00, steps = 200\n",
      "22:55:37 [DEBUG] train episode 1872: reward = 199.00, steps = 199\n",
      "22:55:37 [DEBUG] train episode 1873: reward = 179.00, steps = 179\n",
      "22:55:37 [DEBUG] train episode 1874: reward = 170.00, steps = 170\n",
      "22:55:37 [DEBUG] train episode 1875: reward = 200.00, steps = 200\n",
      "22:55:37 [DEBUG] train episode 1876: reward = 200.00, steps = 200\n",
      "22:55:37 [DEBUG] train episode 1877: reward = 116.00, steps = 116\n",
      "22:55:37 [DEBUG] train episode 1878: reward = 138.00, steps = 138\n",
      "22:55:37 [DEBUG] train episode 1879: reward = 19.00, steps = 19\n",
      "22:55:37 [DEBUG] train episode 1880: reward = 152.00, steps = 152\n",
      "22:55:37 [DEBUG] train episode 1881: reward = 157.00, steps = 157\n",
      "22:55:37 [DEBUG] train episode 1882: reward = 141.00, steps = 141\n",
      "22:55:38 [DEBUG] train episode 1883: reward = 133.00, steps = 133\n",
      "22:55:38 [DEBUG] train episode 1884: reward = 200.00, steps = 200\n",
      "22:55:38 [DEBUG] train episode 1885: reward = 200.00, steps = 200\n",
      "22:55:38 [DEBUG] train episode 1886: reward = 200.00, steps = 200\n",
      "22:55:38 [DEBUG] train episode 1887: reward = 171.00, steps = 171\n",
      "22:55:38 [DEBUG] train episode 1888: reward = 121.00, steps = 121\n",
      "22:55:38 [DEBUG] train episode 1889: reward = 191.00, steps = 191\n",
      "22:55:38 [DEBUG] train episode 1890: reward = 200.00, steps = 200\n",
      "22:55:38 [DEBUG] train episode 1891: reward = 172.00, steps = 172\n",
      "22:55:38 [DEBUG] train episode 1892: reward = 200.00, steps = 200\n",
      "22:55:39 [DEBUG] train episode 1893: reward = 136.00, steps = 136\n",
      "22:55:39 [DEBUG] train episode 1894: reward = 166.00, steps = 166\n",
      "22:55:39 [DEBUG] train episode 1895: reward = 200.00, steps = 200\n",
      "22:55:39 [DEBUG] train episode 1896: reward = 200.00, steps = 200\n",
      "22:55:39 [DEBUG] train episode 1897: reward = 113.00, steps = 113\n",
      "22:55:39 [DEBUG] train episode 1898: reward = 162.00, steps = 162\n",
      "22:55:39 [DEBUG] train episode 1899: reward = 200.00, steps = 200\n",
      "22:55:39 [DEBUG] train episode 1900: reward = 159.00, steps = 159\n",
      "22:55:39 [DEBUG] train episode 1901: reward = 200.00, steps = 200\n",
      "22:55:39 [DEBUG] train episode 1902: reward = 35.00, steps = 35\n",
      "22:55:39 [DEBUG] train episode 1903: reward = 141.00, steps = 141\n",
      "22:55:39 [DEBUG] train episode 1904: reward = 200.00, steps = 200\n",
      "22:55:40 [DEBUG] train episode 1905: reward = 185.00, steps = 185\n",
      "22:55:40 [DEBUG] train episode 1906: reward = 29.00, steps = 29\n",
      "22:55:40 [DEBUG] train episode 1907: reward = 136.00, steps = 136\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:55:40 [DEBUG] train episode 1908: reward = 200.00, steps = 200\n",
      "22:55:40 [DEBUG] train episode 1909: reward = 200.00, steps = 200\n",
      "22:55:40 [DEBUG] train episode 1910: reward = 200.00, steps = 200\n",
      "22:55:40 [DEBUG] train episode 1911: reward = 200.00, steps = 200\n",
      "22:55:40 [DEBUG] train episode 1912: reward = 200.00, steps = 200\n",
      "22:55:40 [DEBUG] train episode 1913: reward = 200.00, steps = 200\n",
      "22:55:40 [DEBUG] train episode 1914: reward = 200.00, steps = 200\n",
      "22:55:40 [DEBUG] train episode 1915: reward = 140.00, steps = 140\n",
      "22:55:41 [DEBUG] train episode 1916: reward = 155.00, steps = 155\n",
      "22:55:41 [DEBUG] train episode 1917: reward = 200.00, steps = 200\n",
      "22:55:41 [DEBUG] train episode 1918: reward = 200.00, steps = 200\n",
      "22:55:41 [DEBUG] train episode 1919: reward = 74.00, steps = 74\n",
      "22:55:41 [DEBUG] train episode 1920: reward = 200.00, steps = 200\n",
      "22:55:41 [DEBUG] train episode 1921: reward = 84.00, steps = 84\n",
      "22:55:41 [DEBUG] train episode 1922: reward = 200.00, steps = 200\n",
      "22:55:41 [DEBUG] train episode 1923: reward = 160.00, steps = 160\n",
      "22:55:41 [DEBUG] train episode 1924: reward = 200.00, steps = 200\n",
      "22:55:41 [DEBUG] train episode 1925: reward = 200.00, steps = 200\n",
      "22:55:41 [DEBUG] train episode 1926: reward = 101.00, steps = 101\n",
      "22:55:42 [DEBUG] train episode 1927: reward = 200.00, steps = 200\n",
      "22:55:42 [DEBUG] train episode 1928: reward = 200.00, steps = 200\n",
      "22:55:42 [DEBUG] train episode 1929: reward = 188.00, steps = 188\n",
      "22:55:42 [DEBUG] train episode 1930: reward = 155.00, steps = 155\n",
      "22:55:42 [DEBUG] train episode 1931: reward = 178.00, steps = 178\n",
      "22:55:42 [DEBUG] train episode 1932: reward = 32.00, steps = 32\n",
      "22:55:42 [DEBUG] train episode 1933: reward = 200.00, steps = 200\n",
      "22:55:42 [DEBUG] train episode 1934: reward = 118.00, steps = 118\n",
      "22:55:42 [DEBUG] train episode 1935: reward = 177.00, steps = 177\n",
      "22:55:42 [DEBUG] train episode 1936: reward = 200.00, steps = 200\n",
      "22:55:42 [DEBUG] train episode 1937: reward = 200.00, steps = 200\n",
      "22:55:42 [DEBUG] train episode 1938: reward = 200.00, steps = 200\n",
      "22:55:43 [DEBUG] train episode 1939: reward = 200.00, steps = 200\n",
      "22:55:43 [DEBUG] train episode 1940: reward = 154.00, steps = 154\n",
      "22:55:43 [DEBUG] train episode 1941: reward = 200.00, steps = 200\n",
      "22:55:43 [DEBUG] train episode 1942: reward = 200.00, steps = 200\n",
      "22:55:43 [DEBUG] train episode 1943: reward = 160.00, steps = 160\n",
      "22:55:43 [DEBUG] train episode 1944: reward = 200.00, steps = 200\n",
      "22:55:43 [DEBUG] train episode 1945: reward = 200.00, steps = 200\n",
      "22:55:43 [DEBUG] train episode 1946: reward = 197.00, steps = 197\n",
      "22:55:43 [DEBUG] train episode 1947: reward = 120.00, steps = 120\n",
      "22:55:43 [DEBUG] train episode 1948: reward = 200.00, steps = 200\n",
      "22:55:43 [DEBUG] train episode 1949: reward = 200.00, steps = 200\n",
      "22:55:44 [DEBUG] train episode 1950: reward = 200.00, steps = 200\n",
      "22:55:44 [DEBUG] train episode 1951: reward = 200.00, steps = 200\n",
      "22:55:44 [DEBUG] train episode 1952: reward = 132.00, steps = 132\n",
      "22:55:44 [DEBUG] train episode 1953: reward = 200.00, steps = 200\n",
      "22:55:44 [DEBUG] train episode 1954: reward = 200.00, steps = 200\n",
      "22:55:44 [DEBUG] train episode 1955: reward = 200.00, steps = 200\n",
      "22:55:44 [DEBUG] train episode 1956: reward = 200.00, steps = 200\n",
      "22:55:44 [DEBUG] train episode 1957: reward = 200.00, steps = 200\n",
      "22:55:44 [DEBUG] train episode 1958: reward = 142.00, steps = 142\n",
      "22:55:44 [DEBUG] train episode 1959: reward = 200.00, steps = 200\n",
      "22:55:45 [DEBUG] train episode 1960: reward = 174.00, steps = 174\n",
      "22:55:45 [DEBUG] train episode 1961: reward = 34.00, steps = 34\n",
      "22:55:45 [DEBUG] train episode 1962: reward = 101.00, steps = 101\n",
      "22:55:45 [DEBUG] train episode 1963: reward = 200.00, steps = 200\n",
      "22:55:45 [DEBUG] train episode 1964: reward = 200.00, steps = 200\n",
      "22:55:45 [DEBUG] train episode 1965: reward = 200.00, steps = 200\n",
      "22:55:45 [DEBUG] train episode 1966: reward = 163.00, steps = 163\n",
      "22:55:45 [DEBUG] train episode 1967: reward = 200.00, steps = 200\n",
      "22:55:45 [DEBUG] train episode 1968: reward = 200.00, steps = 200\n",
      "22:55:45 [DEBUG] train episode 1969: reward = 174.00, steps = 174\n",
      "22:55:45 [DEBUG] train episode 1970: reward = 167.00, steps = 167\n",
      "22:55:45 [DEBUG] train episode 1971: reward = 200.00, steps = 200\n",
      "22:55:46 [DEBUG] train episode 1972: reward = 200.00, steps = 200\n",
      "22:55:46 [DEBUG] train episode 1973: reward = 200.00, steps = 200\n",
      "22:55:46 [DEBUG] train episode 1974: reward = 190.00, steps = 190\n",
      "22:55:46 [DEBUG] train episode 1975: reward = 200.00, steps = 200\n",
      "22:55:46 [DEBUG] train episode 1976: reward = 200.00, steps = 200\n",
      "22:55:46 [DEBUG] train episode 1977: reward = 200.00, steps = 200\n",
      "22:55:46 [DEBUG] train episode 1978: reward = 113.00, steps = 113\n",
      "22:55:46 [DEBUG] train episode 1979: reward = 190.00, steps = 190\n",
      "22:55:46 [DEBUG] train episode 1980: reward = 200.00, steps = 200\n",
      "22:55:47 [DEBUG] train episode 1981: reward = 186.00, steps = 186\n",
      "22:55:47 [DEBUG] train episode 1982: reward = 179.00, steps = 179\n",
      "22:55:47 [DEBUG] train episode 1983: reward = 200.00, steps = 200\n",
      "22:55:47 [DEBUG] train episode 1984: reward = 196.00, steps = 196\n",
      "22:55:47 [DEBUG] train episode 1985: reward = 127.00, steps = 127\n",
      "22:55:47 [DEBUG] train episode 1986: reward = 188.00, steps = 188\n",
      "22:55:47 [DEBUG] train episode 1987: reward = 200.00, steps = 200\n",
      "22:55:47 [DEBUG] train episode 1988: reward = 175.00, steps = 175\n",
      "22:55:47 [DEBUG] train episode 1989: reward = 200.00, steps = 200\n",
      "22:55:47 [DEBUG] train episode 1990: reward = 200.00, steps = 200\n",
      "22:55:48 [DEBUG] train episode 1991: reward = 200.00, steps = 200\n",
      "22:55:48 [DEBUG] train episode 1992: reward = 181.00, steps = 181\n",
      "22:55:48 [DEBUG] train episode 1993: reward = 200.00, steps = 200\n",
      "22:55:48 [DEBUG] train episode 1994: reward = 200.00, steps = 200\n",
      "22:55:48 [DEBUG] train episode 1995: reward = 200.00, steps = 200\n",
      "22:55:48 [DEBUG] train episode 1996: reward = 58.00, steps = 58\n",
      "22:55:48 [DEBUG] train episode 1997: reward = 200.00, steps = 200\n",
      "22:55:48 [DEBUG] train episode 1998: reward = 200.00, steps = 200\n",
      "22:55:48 [DEBUG] train episode 1999: reward = 35.00, steps = 35\n",
      "22:55:48 [DEBUG] train episode 2000: reward = 29.00, steps = 29\n",
      "22:55:48 [DEBUG] train episode 2001: reward = 124.00, steps = 124\n",
      "22:55:48 [DEBUG] train episode 2002: reward = 170.00, steps = 170\n",
      "22:55:49 [DEBUG] train episode 2003: reward = 140.00, steps = 140\n",
      "22:55:49 [DEBUG] train episode 2004: reward = 200.00, steps = 200\n",
      "22:55:49 [DEBUG] train episode 2005: reward = 165.00, steps = 165\n",
      "22:55:49 [DEBUG] train episode 2006: reward = 200.00, steps = 200\n",
      "22:55:49 [DEBUG] train episode 2007: reward = 117.00, steps = 117\n",
      "22:55:49 [DEBUG] train episode 2008: reward = 172.00, steps = 172\n",
      "22:55:49 [DEBUG] train episode 2009: reward = 200.00, steps = 200\n",
      "22:55:49 [DEBUG] train episode 2010: reward = 200.00, steps = 200\n",
      "22:55:49 [DEBUG] train episode 2011: reward = 183.00, steps = 183\n",
      "22:55:49 [DEBUG] train episode 2012: reward = 145.00, steps = 145\n",
      "22:55:50 [DEBUG] train episode 2013: reward = 200.00, steps = 200\n",
      "22:55:50 [DEBUG] train episode 2014: reward = 26.00, steps = 26\n",
      "22:55:50 [DEBUG] train episode 2015: reward = 200.00, steps = 200\n",
      "22:55:50 [DEBUG] train episode 2016: reward = 179.00, steps = 179\n",
      "22:55:50 [DEBUG] train episode 2017: reward = 192.00, steps = 192\n",
      "22:55:50 [DEBUG] train episode 2018: reward = 200.00, steps = 200\n",
      "22:55:50 [DEBUG] train episode 2019: reward = 175.00, steps = 175\n",
      "22:55:50 [DEBUG] train episode 2020: reward = 200.00, steps = 200\n",
      "22:55:50 [DEBUG] train episode 2021: reward = 200.00, steps = 200\n",
      "22:55:50 [DEBUG] train episode 2022: reward = 200.00, steps = 200\n",
      "22:55:50 [DEBUG] train episode 2023: reward = 20.00, steps = 20\n",
      "22:55:51 [DEBUG] train episode 2024: reward = 158.00, steps = 158\n",
      "22:55:51 [DEBUG] train episode 2025: reward = 114.00, steps = 114\n",
      "22:55:51 [DEBUG] train episode 2026: reward = 121.00, steps = 121\n",
      "22:55:51 [DEBUG] train episode 2027: reward = 200.00, steps = 200\n",
      "22:55:51 [DEBUG] train episode 2028: reward = 200.00, steps = 200\n",
      "22:55:51 [DEBUG] train episode 2029: reward = 200.00, steps = 200\n",
      "22:55:51 [DEBUG] train episode 2030: reward = 200.00, steps = 200\n",
      "22:55:51 [DEBUG] train episode 2031: reward = 198.00, steps = 198\n",
      "22:55:51 [DEBUG] train episode 2032: reward = 200.00, steps = 200\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:55:51 [DEBUG] train episode 2033: reward = 129.00, steps = 129\n",
      "22:55:52 [DEBUG] train episode 2034: reward = 195.00, steps = 195\n",
      "22:55:52 [DEBUG] train episode 2035: reward = 67.00, steps = 67\n",
      "22:55:52 [DEBUG] train episode 2036: reward = 200.00, steps = 200\n",
      "22:55:52 [DEBUG] train episode 2037: reward = 177.00, steps = 177\n",
      "22:55:52 [DEBUG] train episode 2038: reward = 200.00, steps = 200\n",
      "22:55:52 [DEBUG] train episode 2039: reward = 200.00, steps = 200\n",
      "22:55:52 [DEBUG] train episode 2040: reward = 200.00, steps = 200\n",
      "22:55:52 [DEBUG] train episode 2041: reward = 200.00, steps = 200\n",
      "22:55:52 [DEBUG] train episode 2042: reward = 199.00, steps = 199\n",
      "22:55:52 [DEBUG] train episode 2043: reward = 200.00, steps = 200\n",
      "22:55:52 [DEBUG] train episode 2044: reward = 62.00, steps = 62\n",
      "22:55:52 [DEBUG] train episode 2045: reward = 180.00, steps = 180\n",
      "22:55:53 [DEBUG] train episode 2046: reward = 193.00, steps = 193\n",
      "22:55:53 [DEBUG] train episode 2047: reward = 200.00, steps = 200\n",
      "22:55:53 [DEBUG] train episode 2048: reward = 200.00, steps = 200\n",
      "22:55:53 [DEBUG] train episode 2049: reward = 200.00, steps = 200\n",
      "22:55:53 [DEBUG] train episode 2050: reward = 16.00, steps = 16\n",
      "22:55:53 [DEBUG] train episode 2051: reward = 117.00, steps = 117\n",
      "22:55:53 [DEBUG] train episode 2052: reward = 200.00, steps = 200\n",
      "22:55:53 [DEBUG] train episode 2053: reward = 200.00, steps = 200\n",
      "22:55:53 [DEBUG] train episode 2054: reward = 200.00, steps = 200\n",
      "22:55:53 [DEBUG] train episode 2055: reward = 200.00, steps = 200\n",
      "22:55:53 [DEBUG] train episode 2056: reward = 200.00, steps = 200\n",
      "22:55:54 [DEBUG] train episode 2057: reward = 138.00, steps = 138\n",
      "22:55:54 [DEBUG] train episode 2058: reward = 200.00, steps = 200\n",
      "22:55:54 [DEBUG] train episode 2059: reward = 139.00, steps = 139\n",
      "22:55:54 [DEBUG] train episode 2060: reward = 197.00, steps = 197\n",
      "22:55:54 [DEBUG] train episode 2061: reward = 67.00, steps = 67\n",
      "22:55:54 [DEBUG] train episode 2062: reward = 200.00, steps = 200\n",
      "22:55:54 [DEBUG] train episode 2063: reward = 200.00, steps = 200\n",
      "22:55:54 [DEBUG] train episode 2064: reward = 200.00, steps = 200\n",
      "22:55:54 [DEBUG] train episode 2065: reward = 200.00, steps = 200\n",
      "22:55:54 [DEBUG] train episode 2066: reward = 180.00, steps = 180\n",
      "22:55:54 [DEBUG] train episode 2067: reward = 200.00, steps = 200\n",
      "22:55:55 [DEBUG] train episode 2068: reward = 180.00, steps = 180\n",
      "22:55:55 [DEBUG] train episode 2069: reward = 173.00, steps = 173\n",
      "22:55:55 [DEBUG] train episode 2070: reward = 200.00, steps = 200\n",
      "22:55:55 [DEBUG] train episode 2071: reward = 167.00, steps = 167\n",
      "22:55:55 [DEBUG] train episode 2072: reward = 198.00, steps = 198\n",
      "22:55:55 [DEBUG] train episode 2073: reward = 200.00, steps = 200\n",
      "22:55:55 [DEBUG] train episode 2074: reward = 183.00, steps = 183\n",
      "22:55:55 [DEBUG] train episode 2075: reward = 200.00, steps = 200\n",
      "22:55:55 [DEBUG] train episode 2076: reward = 200.00, steps = 200\n",
      "22:55:55 [DEBUG] train episode 2077: reward = 200.00, steps = 200\n",
      "22:55:56 [DEBUG] train episode 2078: reward = 200.00, steps = 200\n",
      "22:55:56 [DEBUG] train episode 2079: reward = 200.00, steps = 200\n",
      "22:55:56 [DEBUG] train episode 2080: reward = 183.00, steps = 183\n",
      "22:55:56 [DEBUG] train episode 2081: reward = 200.00, steps = 200\n",
      "22:55:56 [DEBUG] train episode 2082: reward = 200.00, steps = 200\n",
      "22:55:56 [DEBUG] train episode 2083: reward = 200.00, steps = 200\n",
      "22:55:56 [DEBUG] train episode 2084: reward = 200.00, steps = 200\n",
      "22:55:56 [DEBUG] train episode 2085: reward = 178.00, steps = 178\n",
      "22:55:56 [DEBUG] train episode 2086: reward = 200.00, steps = 200\n",
      "22:55:56 [DEBUG] train episode 2087: reward = 182.00, steps = 182\n",
      "22:55:57 [DEBUG] train episode 2088: reward = 200.00, steps = 200\n",
      "22:55:57 [DEBUG] train episode 2089: reward = 200.00, steps = 200\n",
      "22:55:57 [DEBUG] train episode 2090: reward = 200.00, steps = 200\n",
      "22:55:57 [DEBUG] train episode 2091: reward = 193.00, steps = 193\n",
      "22:55:57 [DEBUG] train episode 2092: reward = 176.00, steps = 176\n",
      "22:55:57 [DEBUG] train episode 2093: reward = 200.00, steps = 200\n",
      "22:55:57 [DEBUG] train episode 2094: reward = 200.00, steps = 200\n",
      "22:55:57 [DEBUG] train episode 2095: reward = 63.00, steps = 63\n",
      "22:55:57 [DEBUG] train episode 2096: reward = 106.00, steps = 106\n",
      "22:55:57 [DEBUG] train episode 2097: reward = 200.00, steps = 200\n",
      "22:55:57 [DEBUG] train episode 2098: reward = 141.00, steps = 141\n",
      "22:55:58 [DEBUG] train episode 2099: reward = 200.00, steps = 200\n",
      "22:55:58 [DEBUG] train episode 2100: reward = 200.00, steps = 200\n",
      "22:55:58 [DEBUG] train episode 2101: reward = 200.00, steps = 200\n",
      "22:55:58 [DEBUG] train episode 2102: reward = 170.00, steps = 170\n",
      "22:55:58 [DEBUG] train episode 2103: reward = 121.00, steps = 121\n",
      "22:55:58 [DEBUG] train episode 2104: reward = 200.00, steps = 200\n",
      "22:55:58 [DEBUG] train episode 2105: reward = 200.00, steps = 200\n",
      "22:55:58 [DEBUG] train episode 2106: reward = 192.00, steps = 192\n",
      "22:55:58 [DEBUG] train episode 2107: reward = 200.00, steps = 200\n",
      "22:55:58 [DEBUG] train episode 2108: reward = 200.00, steps = 200\n",
      "22:55:59 [DEBUG] train episode 2109: reward = 190.00, steps = 190\n",
      "22:55:59 [DEBUG] train episode 2110: reward = 200.00, steps = 200\n",
      "22:55:59 [DEBUG] train episode 2111: reward = 200.00, steps = 200\n",
      "22:55:59 [DEBUG] train episode 2112: reward = 200.00, steps = 200\n",
      "22:55:59 [DEBUG] train episode 2113: reward = 200.00, steps = 200\n",
      "22:55:59 [DEBUG] train episode 2114: reward = 200.00, steps = 200\n",
      "22:55:59 [DEBUG] train episode 2115: reward = 200.00, steps = 200\n",
      "22:55:59 [DEBUG] train episode 2116: reward = 200.00, steps = 200\n",
      "22:56:00 [DEBUG] train episode 2117: reward = 200.00, steps = 200\n",
      "22:56:00 [DEBUG] train episode 2118: reward = 114.00, steps = 114\n",
      "22:56:00 [DEBUG] train episode 2119: reward = 199.00, steps = 199\n",
      "22:56:00 [DEBUG] train episode 2120: reward = 200.00, steps = 200\n",
      "22:56:00 [DEBUG] train episode 2121: reward = 200.00, steps = 200\n",
      "22:56:00 [DEBUG] train episode 2122: reward = 200.00, steps = 200\n",
      "22:56:00 [DEBUG] train episode 2123: reward = 151.00, steps = 151\n",
      "22:56:00 [DEBUG] train episode 2124: reward = 200.00, steps = 200\n",
      "22:56:00 [DEBUG] train episode 2125: reward = 114.00, steps = 114\n",
      "22:56:00 [DEBUG] train episode 2126: reward = 200.00, steps = 200\n",
      "22:56:00 [DEBUG] train episode 2127: reward = 152.00, steps = 152\n",
      "22:56:01 [DEBUG] train episode 2128: reward = 191.00, steps = 191\n",
      "22:56:01 [DEBUG] train episode 2129: reward = 166.00, steps = 166\n",
      "22:56:01 [DEBUG] train episode 2130: reward = 143.00, steps = 143\n",
      "22:56:01 [DEBUG] train episode 2131: reward = 200.00, steps = 200\n",
      "22:56:01 [DEBUG] train episode 2132: reward = 141.00, steps = 141\n",
      "22:56:01 [DEBUG] train episode 2133: reward = 200.00, steps = 200\n",
      "22:56:01 [DEBUG] train episode 2134: reward = 200.00, steps = 200\n",
      "22:56:01 [DEBUG] train episode 2135: reward = 200.00, steps = 200\n",
      "22:56:01 [DEBUG] train episode 2136: reward = 200.00, steps = 200\n",
      "22:56:01 [DEBUG] train episode 2137: reward = 200.00, steps = 200\n",
      "22:56:01 [DEBUG] train episode 2138: reward = 200.00, steps = 200\n",
      "22:56:02 [DEBUG] train episode 2139: reward = 164.00, steps = 164\n",
      "22:56:02 [DEBUG] train episode 2140: reward = 125.00, steps = 125\n",
      "22:56:02 [DEBUG] train episode 2141: reward = 200.00, steps = 200\n",
      "22:56:02 [DEBUG] train episode 2142: reward = 200.00, steps = 200\n",
      "22:56:02 [DEBUG] train episode 2143: reward = 200.00, steps = 200\n",
      "22:56:02 [DEBUG] train episode 2144: reward = 200.00, steps = 200\n",
      "22:56:02 [DEBUG] train episode 2145: reward = 113.00, steps = 113\n",
      "22:56:02 [DEBUG] train episode 2146: reward = 200.00, steps = 200\n",
      "22:56:02 [DEBUG] train episode 2147: reward = 191.00, steps = 191\n",
      "22:56:02 [DEBUG] train episode 2148: reward = 140.00, steps = 140\n",
      "22:56:02 [DEBUG] train episode 2149: reward = 75.00, steps = 75\n",
      "22:56:02 [DEBUG] train episode 2150: reward = 177.00, steps = 177\n",
      "22:56:03 [DEBUG] train episode 2151: reward = 200.00, steps = 200\n",
      "22:56:03 [DEBUG] train episode 2152: reward = 187.00, steps = 187\n",
      "22:56:03 [DEBUG] train episode 2153: reward = 68.00, steps = 68\n",
      "22:56:03 [DEBUG] train episode 2154: reward = 200.00, steps = 200\n",
      "22:56:03 [DEBUG] train episode 2155: reward = 200.00, steps = 200\n",
      "22:56:03 [DEBUG] train episode 2156: reward = 200.00, steps = 200\n",
      "22:56:03 [DEBUG] train episode 2157: reward = 200.00, steps = 200\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:56:03 [DEBUG] train episode 2158: reward = 200.00, steps = 200\n",
      "22:56:03 [DEBUG] train episode 2159: reward = 200.00, steps = 200\n",
      "22:56:03 [DEBUG] train episode 2160: reward = 200.00, steps = 200\n",
      "22:56:04 [DEBUG] train episode 2161: reward = 172.00, steps = 172\n",
      "22:56:04 [DEBUG] train episode 2162: reward = 200.00, steps = 200\n",
      "22:56:04 [DEBUG] train episode 2163: reward = 200.00, steps = 200\n",
      "22:56:04 [DEBUG] train episode 2164: reward = 177.00, steps = 177\n",
      "22:56:04 [DEBUG] train episode 2165: reward = 200.00, steps = 200\n",
      "22:56:04 [DEBUG] train episode 2166: reward = 200.00, steps = 200\n",
      "22:56:04 [DEBUG] train episode 2167: reward = 200.00, steps = 200\n",
      "22:56:04 [DEBUG] train episode 2168: reward = 200.00, steps = 200\n",
      "22:56:04 [DEBUG] train episode 2169: reward = 187.00, steps = 187\n",
      "22:56:05 [DEBUG] train episode 2170: reward = 164.00, steps = 164\n",
      "22:56:05 [DEBUG] train episode 2171: reward = 165.00, steps = 165\n",
      "22:56:05 [DEBUG] train episode 2172: reward = 200.00, steps = 200\n",
      "22:56:05 [DEBUG] train episode 2173: reward = 200.00, steps = 200\n",
      "22:56:05 [DEBUG] train episode 2174: reward = 200.00, steps = 200\n",
      "22:56:05 [DEBUG] train episode 2175: reward = 200.00, steps = 200\n",
      "22:56:05 [DEBUG] train episode 2176: reward = 80.00, steps = 80\n",
      "22:56:05 [DEBUG] train episode 2177: reward = 200.00, steps = 200\n",
      "22:56:05 [DEBUG] train episode 2178: reward = 172.00, steps = 172\n",
      "22:56:05 [DEBUG] train episode 2179: reward = 94.00, steps = 94\n",
      "22:56:06 [DEBUG] train episode 2180: reward = 200.00, steps = 200\n",
      "22:56:06 [DEBUG] train episode 2181: reward = 200.00, steps = 200\n",
      "22:56:06 [DEBUG] train episode 2182: reward = 182.00, steps = 182\n",
      "22:56:06 [DEBUG] train episode 2183: reward = 190.00, steps = 190\n",
      "22:56:06 [DEBUG] train episode 2184: reward = 80.00, steps = 80\n",
      "22:56:06 [DEBUG] train episode 2185: reward = 200.00, steps = 200\n",
      "22:56:06 [DEBUG] train episode 2186: reward = 200.00, steps = 200\n",
      "22:56:06 [DEBUG] train episode 2187: reward = 200.00, steps = 200\n",
      "22:56:06 [DEBUG] train episode 2188: reward = 199.00, steps = 199\n",
      "22:56:06 [DEBUG] train episode 2189: reward = 172.00, steps = 172\n",
      "22:56:07 [DEBUG] train episode 2190: reward = 200.00, steps = 200\n",
      "22:56:07 [DEBUG] train episode 2191: reward = 200.00, steps = 200\n",
      "22:56:07 [DEBUG] train episode 2192: reward = 200.00, steps = 200\n",
      "22:56:07 [DEBUG] train episode 2193: reward = 188.00, steps = 188\n",
      "22:56:07 [DEBUG] train episode 2194: reward = 200.00, steps = 200\n",
      "22:56:07 [DEBUG] train episode 2195: reward = 200.00, steps = 200\n",
      "22:56:07 [DEBUG] train episode 2196: reward = 200.00, steps = 200\n",
      "22:56:07 [DEBUG] train episode 2197: reward = 200.00, steps = 200\n",
      "22:56:07 [DEBUG] train episode 2198: reward = 200.00, steps = 200\n",
      "22:56:08 [DEBUG] train episode 2199: reward = 200.00, steps = 200\n",
      "22:56:08 [DEBUG] train episode 2200: reward = 190.00, steps = 190\n",
      "22:56:08 [DEBUG] train episode 2201: reward = 200.00, steps = 200\n",
      "22:56:08 [DEBUG] train episode 2202: reward = 200.00, steps = 200\n",
      "22:56:08 [DEBUG] train episode 2203: reward = 140.00, steps = 140\n",
      "22:56:08 [DEBUG] train episode 2204: reward = 178.00, steps = 178\n",
      "22:56:08 [DEBUG] train episode 2205: reward = 200.00, steps = 200\n",
      "22:56:08 [DEBUG] train episode 2206: reward = 200.00, steps = 200\n",
      "22:56:08 [DEBUG] train episode 2207: reward = 145.00, steps = 145\n",
      "22:56:08 [DEBUG] train episode 2208: reward = 200.00, steps = 200\n",
      "22:56:09 [DEBUG] train episode 2209: reward = 200.00, steps = 200\n",
      "22:56:09 [DEBUG] train episode 2210: reward = 200.00, steps = 200\n",
      "22:56:09 [DEBUG] train episode 2211: reward = 200.00, steps = 200\n",
      "22:56:09 [DEBUG] train episode 2212: reward = 200.00, steps = 200\n",
      "22:56:09 [DEBUG] train episode 2213: reward = 200.00, steps = 200\n",
      "22:56:09 [DEBUG] train episode 2214: reward = 200.00, steps = 200\n",
      "22:56:09 [DEBUG] train episode 2215: reward = 200.00, steps = 200\n",
      "22:56:09 [DEBUG] train episode 2216: reward = 200.00, steps = 200\n",
      "22:56:10 [DEBUG] train episode 2217: reward = 200.00, steps = 200\n",
      "22:56:10 [DEBUG] train episode 2218: reward = 126.00, steps = 126\n",
      "22:56:10 [DEBUG] train episode 2219: reward = 192.00, steps = 192\n",
      "22:56:10 [DEBUG] train episode 2220: reward = 200.00, steps = 200\n",
      "22:56:10 [DEBUG] train episode 2221: reward = 183.00, steps = 183\n",
      "22:56:10 [DEBUG] train episode 2222: reward = 200.00, steps = 200\n",
      "22:56:10 [DEBUG] train episode 2223: reward = 200.00, steps = 200\n",
      "22:56:10 [DEBUG] train episode 2224: reward = 130.00, steps = 130\n",
      "22:56:11 [DEBUG] train episode 2225: reward = 200.00, steps = 200\n",
      "22:56:11 [DEBUG] train episode 2226: reward = 200.00, steps = 200\n",
      "22:56:11 [DEBUG] train episode 2227: reward = 200.00, steps = 200\n",
      "22:56:11 [DEBUG] train episode 2228: reward = 132.00, steps = 132\n",
      "22:56:11 [DEBUG] train episode 2229: reward = 200.00, steps = 200\n",
      "22:56:11 [DEBUG] train episode 2230: reward = 200.00, steps = 200\n",
      "22:56:11 [DEBUG] train episode 2231: reward = 200.00, steps = 200\n",
      "22:56:11 [DEBUG] train episode 2232: reward = 200.00, steps = 200\n",
      "22:56:11 [DEBUG] train episode 2233: reward = 200.00, steps = 200\n",
      "22:56:11 [DEBUG] train episode 2234: reward = 27.00, steps = 27\n",
      "22:56:11 [DEBUG] train episode 2235: reward = 148.00, steps = 148\n",
      "22:56:12 [DEBUG] train episode 2236: reward = 142.00, steps = 142\n",
      "22:56:12 [DEBUG] train episode 2237: reward = 200.00, steps = 200\n",
      "22:56:12 [DEBUG] train episode 2238: reward = 194.00, steps = 194\n",
      "22:56:12 [DEBUG] train episode 2239: reward = 200.00, steps = 200\n",
      "22:56:12 [DEBUG] train episode 2240: reward = 83.00, steps = 83\n",
      "22:56:12 [DEBUG] train episode 2241: reward = 200.00, steps = 200\n",
      "22:56:12 [DEBUG] train episode 2242: reward = 158.00, steps = 158\n",
      "22:56:12 [DEBUG] train episode 2243: reward = 180.00, steps = 180\n",
      "22:56:12 [DEBUG] train episode 2244: reward = 164.00, steps = 164\n",
      "22:56:12 [DEBUG] train episode 2245: reward = 136.00, steps = 136\n",
      "22:56:13 [DEBUG] train episode 2246: reward = 184.00, steps = 184\n",
      "22:56:13 [DEBUG] train episode 2247: reward = 154.00, steps = 154\n",
      "22:56:13 [DEBUG] train episode 2248: reward = 154.00, steps = 154\n",
      "22:56:13 [DEBUG] train episode 2249: reward = 200.00, steps = 200\n",
      "22:56:13 [DEBUG] train episode 2250: reward = 190.00, steps = 190\n",
      "22:56:13 [DEBUG] train episode 2251: reward = 150.00, steps = 150\n",
      "22:56:13 [DEBUG] train episode 2252: reward = 200.00, steps = 200\n",
      "22:56:13 [DEBUG] train episode 2253: reward = 200.00, steps = 200\n",
      "22:56:13 [DEBUG] train episode 2254: reward = 200.00, steps = 200\n",
      "22:56:13 [DEBUG] train episode 2255: reward = 152.00, steps = 152\n",
      "22:56:14 [DEBUG] train episode 2256: reward = 200.00, steps = 200\n",
      "22:56:14 [DEBUG] train episode 2257: reward = 16.00, steps = 16\n",
      "22:56:14 [DEBUG] train episode 2258: reward = 178.00, steps = 178\n",
      "22:56:14 [DEBUG] train episode 2259: reward = 200.00, steps = 200\n",
      "22:56:14 [DEBUG] train episode 2260: reward = 200.00, steps = 200\n",
      "22:56:14 [DEBUG] train episode 2261: reward = 200.00, steps = 200\n",
      "22:56:14 [DEBUG] train episode 2262: reward = 160.00, steps = 160\n",
      "22:56:14 [DEBUG] train episode 2263: reward = 32.00, steps = 32\n",
      "22:56:14 [DEBUG] train episode 2264: reward = 76.00, steps = 76\n",
      "22:56:14 [DEBUG] train episode 2265: reward = 200.00, steps = 200\n",
      "22:56:15 [DEBUG] train episode 2266: reward = 200.00, steps = 200\n",
      "22:56:15 [DEBUG] train episode 2267: reward = 171.00, steps = 171\n",
      "22:56:15 [DEBUG] train episode 2268: reward = 147.00, steps = 147\n",
      "22:56:15 [DEBUG] train episode 2269: reward = 200.00, steps = 200\n",
      "22:56:15 [DEBUG] train episode 2270: reward = 87.00, steps = 87\n",
      "22:56:15 [DEBUG] train episode 2271: reward = 200.00, steps = 200\n",
      "22:56:15 [DEBUG] train episode 2272: reward = 185.00, steps = 185\n",
      "22:56:15 [DEBUG] train episode 2273: reward = 200.00, steps = 200\n",
      "22:56:15 [DEBUG] train episode 2274: reward = 200.00, steps = 200\n",
      "22:56:15 [DEBUG] train episode 2275: reward = 167.00, steps = 167\n",
      "22:56:16 [DEBUG] train episode 2276: reward = 186.00, steps = 186\n",
      "22:56:16 [DEBUG] train episode 2277: reward = 160.00, steps = 160\n",
      "22:56:16 [DEBUG] train episode 2278: reward = 156.00, steps = 156\n",
      "22:56:16 [DEBUG] train episode 2279: reward = 200.00, steps = 200\n",
      "22:56:16 [DEBUG] train episode 2280: reward = 185.00, steps = 185\n",
      "22:56:16 [DEBUG] train episode 2281: reward = 200.00, steps = 200\n",
      "22:56:16 [DEBUG] train episode 2282: reward = 147.00, steps = 147\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:56:16 [DEBUG] train episode 2283: reward = 200.00, steps = 200\n",
      "22:56:16 [DEBUG] train episode 2284: reward = 132.00, steps = 132\n",
      "22:56:17 [DEBUG] train episode 2285: reward = 200.00, steps = 200\n",
      "22:56:17 [DEBUG] train episode 2286: reward = 200.00, steps = 200\n",
      "22:56:17 [DEBUG] train episode 2287: reward = 200.00, steps = 200\n",
      "22:56:17 [DEBUG] train episode 2288: reward = 124.00, steps = 124\n",
      "22:56:17 [DEBUG] train episode 2289: reward = 200.00, steps = 200\n",
      "22:56:17 [DEBUG] train episode 2290: reward = 200.00, steps = 200\n",
      "22:56:17 [DEBUG] train episode 2291: reward = 199.00, steps = 199\n",
      "22:56:17 [DEBUG] train episode 2292: reward = 126.00, steps = 126\n",
      "22:56:17 [DEBUG] train episode 2293: reward = 178.00, steps = 178\n",
      "22:56:17 [DEBUG] train episode 2294: reward = 200.00, steps = 200\n",
      "22:56:18 [DEBUG] train episode 2295: reward = 200.00, steps = 200\n",
      "22:56:18 [DEBUG] train episode 2296: reward = 182.00, steps = 182\n",
      "22:56:18 [DEBUG] train episode 2297: reward = 182.00, steps = 182\n",
      "22:56:18 [DEBUG] train episode 2298: reward = 200.00, steps = 200\n",
      "22:56:18 [DEBUG] train episode 2299: reward = 132.00, steps = 132\n",
      "22:56:18 [DEBUG] train episode 2300: reward = 200.00, steps = 200\n",
      "22:56:18 [DEBUG] train episode 2301: reward = 119.00, steps = 119\n",
      "22:56:18 [DEBUG] train episode 2302: reward = 200.00, steps = 200\n",
      "22:56:18 [DEBUG] train episode 2303: reward = 200.00, steps = 200\n",
      "22:56:18 [DEBUG] train episode 2304: reward = 200.00, steps = 200\n",
      "22:56:19 [DEBUG] train episode 2305: reward = 178.00, steps = 178\n",
      "22:56:19 [DEBUG] train episode 2306: reward = 133.00, steps = 133\n",
      "22:56:19 [DEBUG] train episode 2307: reward = 200.00, steps = 200\n",
      "22:56:19 [DEBUG] train episode 2308: reward = 173.00, steps = 173\n",
      "22:56:19 [DEBUG] train episode 2309: reward = 200.00, steps = 200\n",
      "22:56:19 [DEBUG] train episode 2310: reward = 200.00, steps = 200\n",
      "22:56:19 [DEBUG] train episode 2311: reward = 163.00, steps = 163\n",
      "22:56:19 [DEBUG] train episode 2312: reward = 191.00, steps = 191\n",
      "22:56:19 [DEBUG] train episode 2313: reward = 169.00, steps = 169\n",
      "22:56:20 [DEBUG] train episode 2314: reward = 200.00, steps = 200\n",
      "22:56:20 [DEBUG] train episode 2315: reward = 180.00, steps = 180\n",
      "22:56:20 [DEBUG] train episode 2316: reward = 185.00, steps = 185\n",
      "22:56:20 [DEBUG] train episode 2317: reward = 200.00, steps = 200\n",
      "22:56:20 [DEBUG] train episode 2318: reward = 185.00, steps = 185\n",
      "22:56:20 [DEBUG] train episode 2319: reward = 168.00, steps = 168\n",
      "22:56:20 [DEBUG] train episode 2320: reward = 42.00, steps = 42\n",
      "22:56:20 [DEBUG] train episode 2321: reward = 200.00, steps = 200\n",
      "22:56:20 [DEBUG] train episode 2322: reward = 127.00, steps = 127\n",
      "22:56:20 [DEBUG] train episode 2323: reward = 200.00, steps = 200\n",
      "22:56:20 [DEBUG] train episode 2324: reward = 178.00, steps = 178\n",
      "22:56:21 [DEBUG] train episode 2325: reward = 156.00, steps = 156\n",
      "22:56:21 [DEBUG] train episode 2326: reward = 142.00, steps = 142\n",
      "22:56:21 [DEBUG] train episode 2327: reward = 75.00, steps = 75\n",
      "22:56:21 [DEBUG] train episode 2328: reward = 200.00, steps = 200\n",
      "22:56:21 [DEBUG] train episode 2329: reward = 200.00, steps = 200\n",
      "22:56:21 [DEBUG] train episode 2330: reward = 165.00, steps = 165\n",
      "22:56:21 [DEBUG] train episode 2331: reward = 200.00, steps = 200\n",
      "22:56:21 [DEBUG] train episode 2332: reward = 195.00, steps = 195\n",
      "22:56:21 [DEBUG] train episode 2333: reward = 200.00, steps = 200\n",
      "22:56:21 [DEBUG] train episode 2334: reward = 177.00, steps = 177\n",
      "22:56:22 [DEBUG] train episode 2335: reward = 200.00, steps = 200\n",
      "22:56:22 [DEBUG] train episode 2336: reward = 154.00, steps = 154\n",
      "22:56:22 [DEBUG] train episode 2337: reward = 200.00, steps = 200\n",
      "22:56:22 [DEBUG] train episode 2338: reward = 157.00, steps = 157\n",
      "22:56:22 [DEBUG] train episode 2339: reward = 200.00, steps = 200\n",
      "22:56:22 [DEBUG] train episode 2340: reward = 200.00, steps = 200\n",
      "22:56:22 [DEBUG] train episode 2341: reward = 200.00, steps = 200\n",
      "22:56:22 [DEBUG] train episode 2342: reward = 178.00, steps = 178\n",
      "22:56:22 [DEBUG] train episode 2343: reward = 138.00, steps = 138\n",
      "22:56:22 [DEBUG] train episode 2344: reward = 127.00, steps = 127\n",
      "22:56:23 [DEBUG] train episode 2345: reward = 97.00, steps = 97\n",
      "22:56:23 [DEBUG] train episode 2346: reward = 200.00, steps = 200\n",
      "22:56:23 [DEBUG] train episode 2347: reward = 97.00, steps = 97\n",
      "22:56:23 [DEBUG] train episode 2348: reward = 156.00, steps = 156\n",
      "22:56:23 [DEBUG] train episode 2349: reward = 185.00, steps = 185\n",
      "22:56:23 [DEBUG] train episode 2350: reward = 176.00, steps = 176\n",
      "22:56:23 [DEBUG] train episode 2351: reward = 39.00, steps = 39\n",
      "22:56:23 [DEBUG] train episode 2352: reward = 200.00, steps = 200\n",
      "22:56:23 [DEBUG] train episode 2353: reward = 157.00, steps = 157\n",
      "22:56:23 [DEBUG] train episode 2354: reward = 200.00, steps = 200\n",
      "22:56:23 [DEBUG] train episode 2355: reward = 183.00, steps = 183\n",
      "22:56:24 [DEBUG] train episode 2356: reward = 200.00, steps = 200\n",
      "22:56:24 [DEBUG] train episode 2357: reward = 200.00, steps = 200\n",
      "22:56:24 [DEBUG] train episode 2358: reward = 200.00, steps = 200\n",
      "22:56:24 [DEBUG] train episode 2359: reward = 200.00, steps = 200\n",
      "22:56:24 [DEBUG] train episode 2360: reward = 200.00, steps = 200\n",
      "22:56:24 [DEBUG] train episode 2361: reward = 200.00, steps = 200\n",
      "22:56:24 [DEBUG] train episode 2362: reward = 120.00, steps = 120\n",
      "22:56:24 [DEBUG] train episode 2363: reward = 200.00, steps = 200\n",
      "22:56:24 [DEBUG] train episode 2364: reward = 200.00, steps = 200\n",
      "22:56:24 [DEBUG] train episode 2365: reward = 200.00, steps = 200\n",
      "22:56:25 [DEBUG] train episode 2366: reward = 200.00, steps = 200\n",
      "22:56:25 [DEBUG] train episode 2367: reward = 141.00, steps = 141\n",
      "22:56:25 [DEBUG] train episode 2368: reward = 200.00, steps = 200\n",
      "22:56:25 [DEBUG] train episode 2369: reward = 200.00, steps = 200\n",
      "22:56:25 [DEBUG] train episode 2370: reward = 200.00, steps = 200\n",
      "22:56:25 [DEBUG] train episode 2371: reward = 200.00, steps = 200\n",
      "22:56:25 [DEBUG] train episode 2372: reward = 187.00, steps = 187\n",
      "22:56:25 [DEBUG] train episode 2373: reward = 188.00, steps = 188\n",
      "22:56:25 [DEBUG] train episode 2374: reward = 200.00, steps = 200\n",
      "22:56:26 [DEBUG] train episode 2375: reward = 186.00, steps = 186\n",
      "22:56:26 [DEBUG] train episode 2376: reward = 200.00, steps = 200\n",
      "22:56:26 [DEBUG] train episode 2377: reward = 109.00, steps = 109\n",
      "22:56:26 [DEBUG] train episode 2378: reward = 200.00, steps = 200\n",
      "22:56:26 [DEBUG] train episode 2379: reward = 164.00, steps = 164\n",
      "22:56:26 [DEBUG] train episode 2380: reward = 200.00, steps = 200\n",
      "22:56:26 [DEBUG] train episode 2381: reward = 200.00, steps = 200\n",
      "22:56:26 [DEBUG] train episode 2382: reward = 129.00, steps = 129\n",
      "22:56:26 [DEBUG] train episode 2383: reward = 166.00, steps = 166\n",
      "22:56:26 [DEBUG] train episode 2384: reward = 123.00, steps = 123\n",
      "22:56:26 [DEBUG] train episode 2385: reward = 65.00, steps = 65\n",
      "22:56:27 [DEBUG] train episode 2386: reward = 200.00, steps = 200\n",
      "22:56:27 [DEBUG] train episode 2387: reward = 143.00, steps = 143\n",
      "22:56:27 [DEBUG] train episode 2388: reward = 182.00, steps = 182\n",
      "22:56:27 [DEBUG] train episode 2389: reward = 200.00, steps = 200\n",
      "22:56:27 [DEBUG] train episode 2390: reward = 200.00, steps = 200\n",
      "22:56:27 [DEBUG] train episode 2391: reward = 166.00, steps = 166\n",
      "22:56:27 [DEBUG] train episode 2392: reward = 200.00, steps = 200\n",
      "22:56:27 [DEBUG] train episode 2393: reward = 185.00, steps = 185\n",
      "22:56:27 [DEBUG] train episode 2394: reward = 124.00, steps = 124\n",
      "22:56:28 [DEBUG] train episode 2395: reward = 192.00, steps = 192\n",
      "22:56:28 [DEBUG] train episode 2396: reward = 200.00, steps = 200\n",
      "22:56:28 [DEBUG] train episode 2397: reward = 32.00, steps = 32\n",
      "22:56:28 [DEBUG] train episode 2398: reward = 183.00, steps = 183\n",
      "22:56:28 [DEBUG] train episode 2399: reward = 200.00, steps = 200\n",
      "22:56:28 [DEBUG] train episode 2400: reward = 81.00, steps = 81\n",
      "22:56:28 [DEBUG] train episode 2401: reward = 200.00, steps = 200\n",
      "22:56:28 [DEBUG] train episode 2402: reward = 200.00, steps = 200\n",
      "22:56:28 [DEBUG] train episode 2403: reward = 172.00, steps = 172\n",
      "22:56:28 [DEBUG] train episode 2404: reward = 200.00, steps = 200\n",
      "22:56:28 [DEBUG] train episode 2405: reward = 200.00, steps = 200\n",
      "22:56:29 [DEBUG] train episode 2406: reward = 200.00, steps = 200\n",
      "22:56:29 [DEBUG] train episode 2407: reward = 131.00, steps = 131\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:56:29 [DEBUG] train episode 2408: reward = 200.00, steps = 200\n",
      "22:56:29 [DEBUG] train episode 2409: reward = 136.00, steps = 136\n",
      "22:56:29 [DEBUG] train episode 2410: reward = 144.00, steps = 144\n",
      "22:56:29 [DEBUG] train episode 2411: reward = 125.00, steps = 125\n",
      "22:56:29 [DEBUG] train episode 2412: reward = 200.00, steps = 200\n",
      "22:56:29 [DEBUG] train episode 2413: reward = 200.00, steps = 200\n",
      "22:56:29 [DEBUG] train episode 2414: reward = 138.00, steps = 138\n",
      "22:56:29 [DEBUG] train episode 2415: reward = 65.00, steps = 65\n",
      "22:56:30 [DEBUG] train episode 2416: reward = 193.00, steps = 193\n",
      "22:56:30 [DEBUG] train episode 2417: reward = 148.00, steps = 148\n",
      "22:56:30 [DEBUG] train episode 2418: reward = 187.00, steps = 187\n",
      "22:56:30 [DEBUG] train episode 2419: reward = 200.00, steps = 200\n",
      "22:56:30 [DEBUG] train episode 2420: reward = 160.00, steps = 160\n",
      "22:56:30 [DEBUG] train episode 2421: reward = 161.00, steps = 161\n",
      "22:56:30 [DEBUG] train episode 2422: reward = 124.00, steps = 124\n",
      "22:56:30 [DEBUG] train episode 2423: reward = 199.00, steps = 199\n",
      "22:56:30 [DEBUG] train episode 2424: reward = 187.00, steps = 187\n",
      "22:56:30 [DEBUG] train episode 2425: reward = 134.00, steps = 134\n",
      "22:56:31 [DEBUG] train episode 2426: reward = 121.00, steps = 121\n",
      "22:56:31 [DEBUG] train episode 2427: reward = 111.00, steps = 111\n",
      "22:56:31 [DEBUG] train episode 2428: reward = 169.00, steps = 169\n",
      "22:56:31 [DEBUG] train episode 2429: reward = 99.00, steps = 99\n",
      "22:56:31 [DEBUG] train episode 2430: reward = 200.00, steps = 200\n",
      "22:56:31 [DEBUG] train episode 2431: reward = 200.00, steps = 200\n",
      "22:56:31 [DEBUG] train episode 2432: reward = 200.00, steps = 200\n",
      "22:56:31 [DEBUG] train episode 2433: reward = 152.00, steps = 152\n",
      "22:56:31 [DEBUG] train episode 2434: reward = 200.00, steps = 200\n",
      "22:56:31 [DEBUG] train episode 2435: reward = 200.00, steps = 200\n",
      "22:56:32 [DEBUG] train episode 2436: reward = 189.00, steps = 189\n",
      "22:56:32 [DEBUG] train episode 2437: reward = 200.00, steps = 200\n",
      "22:56:32 [DEBUG] train episode 2438: reward = 161.00, steps = 161\n",
      "22:56:32 [DEBUG] train episode 2439: reward = 189.00, steps = 189\n",
      "22:56:32 [DEBUG] train episode 2440: reward = 200.00, steps = 200\n",
      "22:56:32 [DEBUG] train episode 2441: reward = 167.00, steps = 167\n",
      "22:56:32 [DEBUG] train episode 2442: reward = 200.00, steps = 200\n",
      "22:56:32 [DEBUG] train episode 2443: reward = 200.00, steps = 200\n",
      "22:56:32 [DEBUG] train episode 2444: reward = 129.00, steps = 129\n",
      "22:56:32 [DEBUG] train episode 2445: reward = 144.00, steps = 144\n",
      "22:56:33 [DEBUG] train episode 2446: reward = 197.00, steps = 197\n",
      "22:56:33 [DEBUG] train episode 2447: reward = 181.00, steps = 181\n",
      "22:56:33 [DEBUG] train episode 2448: reward = 160.00, steps = 160\n",
      "22:56:33 [DEBUG] train episode 2449: reward = 108.00, steps = 108\n",
      "22:56:33 [DEBUG] train episode 2450: reward = 155.00, steps = 155\n",
      "22:56:33 [DEBUG] train episode 2451: reward = 186.00, steps = 186\n",
      "22:56:33 [DEBUG] train episode 2452: reward = 68.00, steps = 68\n",
      "22:56:33 [DEBUG] train episode 2453: reward = 137.00, steps = 137\n",
      "22:56:33 [DEBUG] train episode 2454: reward = 137.00, steps = 137\n",
      "22:56:33 [DEBUG] train episode 2455: reward = 200.00, steps = 200\n",
      "22:56:33 [DEBUG] train episode 2456: reward = 142.00, steps = 142\n",
      "22:56:33 [DEBUG] train episode 2457: reward = 200.00, steps = 200\n",
      "22:56:34 [DEBUG] train episode 2458: reward = 200.00, steps = 200\n",
      "22:56:34 [DEBUG] train episode 2459: reward = 200.00, steps = 200\n",
      "22:56:34 [DEBUG] train episode 2460: reward = 169.00, steps = 169\n",
      "22:56:34 [DEBUG] train episode 2461: reward = 200.00, steps = 200\n",
      "22:56:34 [DEBUG] train episode 2462: reward = 179.00, steps = 179\n",
      "22:56:34 [DEBUG] train episode 2463: reward = 200.00, steps = 200\n",
      "22:56:34 [DEBUG] train episode 2464: reward = 196.00, steps = 196\n",
      "22:56:34 [DEBUG] train episode 2465: reward = 112.00, steps = 112\n",
      "22:56:34 [DEBUG] train episode 2466: reward = 200.00, steps = 200\n",
      "22:56:34 [DEBUG] train episode 2467: reward = 145.00, steps = 145\n",
      "22:56:34 [DEBUG] train episode 2468: reward = 129.00, steps = 129\n",
      "22:56:35 [DEBUG] train episode 2469: reward = 130.00, steps = 130\n",
      "22:56:35 [DEBUG] train episode 2470: reward = 157.00, steps = 157\n",
      "22:56:35 [DEBUG] train episode 2471: reward = 165.00, steps = 165\n",
      "22:56:35 [DEBUG] train episode 2472: reward = 158.00, steps = 158\n",
      "22:56:35 [DEBUG] train episode 2473: reward = 200.00, steps = 200\n",
      "22:56:35 [DEBUG] train episode 2474: reward = 187.00, steps = 187\n",
      "22:56:35 [DEBUG] train episode 2475: reward = 200.00, steps = 200\n",
      "22:56:35 [DEBUG] train episode 2476: reward = 200.00, steps = 200\n",
      "22:56:35 [DEBUG] train episode 2477: reward = 168.00, steps = 168\n",
      "22:56:36 [DEBUG] train episode 2478: reward = 200.00, steps = 200\n",
      "22:56:36 [DEBUG] train episode 2479: reward = 148.00, steps = 148\n",
      "22:56:36 [DEBUG] train episode 2480: reward = 200.00, steps = 200\n",
      "22:56:36 [DEBUG] train episode 2481: reward = 200.00, steps = 200\n",
      "22:56:36 [DEBUG] train episode 2482: reward = 196.00, steps = 196\n",
      "22:56:36 [DEBUG] train episode 2483: reward = 183.00, steps = 183\n",
      "22:56:36 [DEBUG] train episode 2484: reward = 163.00, steps = 163\n",
      "22:56:36 [DEBUG] train episode 2485: reward = 200.00, steps = 200\n",
      "22:56:36 [DEBUG] train episode 2486: reward = 198.00, steps = 198\n",
      "22:56:36 [DEBUG] train episode 2487: reward = 155.00, steps = 155\n",
      "22:56:36 [DEBUG] train episode 2488: reward = 156.00, steps = 156\n",
      "22:56:36 [DEBUG] train episode 2489: reward = 200.00, steps = 200\n",
      "22:56:37 [DEBUG] train episode 2490: reward = 200.00, steps = 200\n",
      "22:56:37 [DEBUG] train episode 2491: reward = 142.00, steps = 142\n",
      "22:56:37 [DEBUG] train episode 2492: reward = 155.00, steps = 155\n",
      "22:56:37 [DEBUG] train episode 2493: reward = 146.00, steps = 146\n",
      "22:56:37 [DEBUG] train episode 2494: reward = 200.00, steps = 200\n",
      "22:56:37 [DEBUG] train episode 2495: reward = 182.00, steps = 182\n",
      "22:56:37 [DEBUG] train episode 2496: reward = 139.00, steps = 139\n",
      "22:56:37 [DEBUG] train episode 2497: reward = 200.00, steps = 200\n",
      "22:56:37 [DEBUG] train episode 2498: reward = 200.00, steps = 200\n",
      "22:56:38 [DEBUG] train episode 2499: reward = 200.00, steps = 200\n",
      "22:56:38 [DEBUG] train episode 2500: reward = 200.00, steps = 200\n",
      "22:56:38 [DEBUG] train episode 2501: reward = 200.00, steps = 200\n",
      "22:56:38 [DEBUG] train episode 2502: reward = 192.00, steps = 192\n",
      "22:56:38 [DEBUG] train episode 2503: reward = 200.00, steps = 200\n",
      "22:56:38 [DEBUG] train episode 2504: reward = 200.00, steps = 200\n",
      "22:56:38 [DEBUG] train episode 2505: reward = 190.00, steps = 190\n",
      "22:56:38 [DEBUG] train episode 2506: reward = 156.00, steps = 156\n",
      "22:56:39 [DEBUG] train episode 2507: reward = 136.00, steps = 136\n",
      "22:56:39 [DEBUG] train episode 2508: reward = 180.00, steps = 180\n",
      "22:56:39 [DEBUG] train episode 2509: reward = 200.00, steps = 200\n",
      "22:56:39 [DEBUG] train episode 2510: reward = 148.00, steps = 148\n",
      "22:56:39 [DEBUG] train episode 2511: reward = 200.00, steps = 200\n",
      "22:56:39 [DEBUG] train episode 2512: reward = 200.00, steps = 200\n",
      "22:56:39 [DEBUG] train episode 2513: reward = 200.00, steps = 200\n",
      "22:56:39 [DEBUG] train episode 2514: reward = 174.00, steps = 174\n",
      "22:56:39 [DEBUG] train episode 2515: reward = 166.00, steps = 166\n",
      "22:56:40 [DEBUG] train episode 2516: reward = 196.00, steps = 196\n",
      "22:56:40 [DEBUG] train episode 2517: reward = 127.00, steps = 127\n",
      "22:56:40 [DEBUG] train episode 2518: reward = 187.00, steps = 187\n",
      "22:56:40 [DEBUG] train episode 2519: reward = 73.00, steps = 73\n",
      "22:56:40 [DEBUG] train episode 2520: reward = 169.00, steps = 169\n",
      "22:56:40 [DEBUG] train episode 2521: reward = 200.00, steps = 200\n",
      "22:56:40 [DEBUG] train episode 2522: reward = 200.00, steps = 200\n",
      "22:56:40 [DEBUG] train episode 2523: reward = 200.00, steps = 200\n",
      "22:56:40 [DEBUG] train episode 2524: reward = 156.00, steps = 156\n",
      "22:56:41 [DEBUG] train episode 2525: reward = 179.00, steps = 179\n",
      "22:56:41 [DEBUG] train episode 2526: reward = 200.00, steps = 200\n",
      "22:56:41 [DEBUG] train episode 2527: reward = 126.00, steps = 126\n",
      "22:56:41 [DEBUG] train episode 2528: reward = 142.00, steps = 142\n",
      "22:56:41 [DEBUG] train episode 2529: reward = 197.00, steps = 197\n",
      "22:56:41 [DEBUG] train episode 2530: reward = 161.00, steps = 161\n",
      "22:56:41 [DEBUG] train episode 2531: reward = 187.00, steps = 187\n",
      "22:56:41 [DEBUG] train episode 2532: reward = 200.00, steps = 200\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:56:42 [DEBUG] train episode 2533: reward = 200.00, steps = 200\n",
      "22:56:42 [DEBUG] train episode 2534: reward = 181.00, steps = 181\n",
      "22:56:42 [DEBUG] train episode 2535: reward = 175.00, steps = 175\n",
      "22:56:42 [DEBUG] train episode 2536: reward = 200.00, steps = 200\n",
      "22:56:42 [DEBUG] train episode 2537: reward = 76.00, steps = 76\n",
      "22:56:42 [DEBUG] train episode 2538: reward = 192.00, steps = 192\n",
      "22:56:42 [DEBUG] train episode 2539: reward = 118.00, steps = 118\n",
      "22:56:42 [DEBUG] train episode 2540: reward = 200.00, steps = 200\n",
      "22:56:42 [DEBUG] train episode 2541: reward = 168.00, steps = 168\n",
      "22:56:43 [DEBUG] train episode 2542: reward = 178.00, steps = 178\n",
      "22:56:43 [DEBUG] train episode 2543: reward = 172.00, steps = 172\n",
      "22:56:43 [DEBUG] train episode 2544: reward = 138.00, steps = 138\n",
      "22:56:43 [DEBUG] train episode 2545: reward = 200.00, steps = 200\n",
      "22:56:43 [DEBUG] train episode 2546: reward = 176.00, steps = 176\n",
      "22:56:43 [DEBUG] train episode 2547: reward = 200.00, steps = 200\n",
      "22:56:43 [DEBUG] train episode 2548: reward = 200.00, steps = 200\n",
      "22:56:43 [DEBUG] train episode 2549: reward = 162.00, steps = 162\n",
      "22:56:44 [DEBUG] train episode 2550: reward = 200.00, steps = 200\n",
      "22:56:44 [DEBUG] train episode 2551: reward = 200.00, steps = 200\n",
      "22:56:44 [DEBUG] train episode 2552: reward = 189.00, steps = 189\n",
      "22:56:44 [DEBUG] train episode 2553: reward = 157.00, steps = 157\n",
      "22:56:44 [DEBUG] train episode 2554: reward = 200.00, steps = 200\n",
      "22:56:44 [DEBUG] train episode 2555: reward = 200.00, steps = 200\n",
      "22:56:44 [DEBUG] train episode 2556: reward = 200.00, steps = 200\n",
      "22:56:45 [DEBUG] train episode 2557: reward = 187.00, steps = 187\n",
      "22:56:45 [DEBUG] train episode 2558: reward = 156.00, steps = 156\n",
      "22:56:45 [DEBUG] train episode 2559: reward = 130.00, steps = 130\n",
      "22:56:45 [DEBUG] train episode 2560: reward = 200.00, steps = 200\n",
      "22:56:45 [DEBUG] train episode 2561: reward = 40.00, steps = 40\n",
      "22:56:45 [DEBUG] train episode 2562: reward = 200.00, steps = 200\n",
      "22:56:45 [DEBUG] train episode 2563: reward = 194.00, steps = 194\n",
      "22:56:45 [DEBUG] train episode 2564: reward = 200.00, steps = 200\n",
      "22:56:45 [DEBUG] train episode 2565: reward = 200.00, steps = 200\n",
      "22:56:45 [DEBUG] train episode 2566: reward = 173.00, steps = 173\n",
      "22:56:46 [DEBUG] train episode 2567: reward = 149.00, steps = 149\n",
      "22:56:46 [DEBUG] train episode 2568: reward = 200.00, steps = 200\n",
      "22:56:46 [DEBUG] train episode 2569: reward = 136.00, steps = 136\n",
      "22:56:46 [DEBUG] train episode 2570: reward = 200.00, steps = 200\n",
      "22:56:46 [DEBUG] train episode 2571: reward = 200.00, steps = 200\n",
      "22:56:46 [DEBUG] train episode 2572: reward = 153.00, steps = 153\n",
      "22:56:46 [DEBUG] train episode 2573: reward = 165.00, steps = 165\n",
      "22:56:46 [DEBUG] train episode 2574: reward = 200.00, steps = 200\n",
      "22:56:46 [DEBUG] train episode 2575: reward = 200.00, steps = 200\n",
      "22:56:47 [DEBUG] train episode 2576: reward = 179.00, steps = 179\n",
      "22:56:47 [DEBUG] train episode 2577: reward = 176.00, steps = 176\n",
      "22:56:47 [DEBUG] train episode 2578: reward = 180.00, steps = 180\n",
      "22:56:47 [DEBUG] train episode 2579: reward = 119.00, steps = 119\n",
      "22:56:47 [DEBUG] train episode 2580: reward = 123.00, steps = 123\n",
      "22:56:47 [DEBUG] train episode 2581: reward = 186.00, steps = 186\n",
      "22:56:47 [DEBUG] train episode 2582: reward = 200.00, steps = 200\n",
      "22:56:47 [DEBUG] train episode 2583: reward = 180.00, steps = 180\n",
      "22:56:47 [DEBUG] train episode 2584: reward = 150.00, steps = 150\n",
      "22:56:47 [DEBUG] train episode 2585: reward = 200.00, steps = 200\n",
      "22:56:48 [DEBUG] train episode 2586: reward = 158.00, steps = 158\n",
      "22:56:48 [DEBUG] train episode 2587: reward = 200.00, steps = 200\n",
      "22:56:48 [DEBUG] train episode 2588: reward = 180.00, steps = 180\n",
      "22:56:48 [DEBUG] train episode 2589: reward = 200.00, steps = 200\n",
      "22:56:48 [DEBUG] train episode 2590: reward = 200.00, steps = 200\n",
      "22:56:48 [DEBUG] train episode 2591: reward = 189.00, steps = 189\n",
      "22:56:48 [DEBUG] train episode 2592: reward = 200.00, steps = 200\n",
      "22:56:49 [DEBUG] train episode 2593: reward = 200.00, steps = 200\n",
      "22:56:49 [DEBUG] train episode 2594: reward = 200.00, steps = 200\n",
      "22:56:49 [DEBUG] train episode 2595: reward = 200.00, steps = 200\n",
      "22:56:49 [DEBUG] train episode 2596: reward = 146.00, steps = 146\n",
      "22:56:49 [DEBUG] train episode 2597: reward = 177.00, steps = 177\n",
      "22:56:49 [DEBUG] train episode 2598: reward = 179.00, steps = 179\n",
      "22:56:49 [DEBUG] train episode 2599: reward = 136.00, steps = 136\n",
      "22:56:49 [DEBUG] train episode 2600: reward = 84.00, steps = 84\n",
      "22:56:49 [DEBUG] train episode 2601: reward = 148.00, steps = 148\n",
      "22:56:49 [DEBUG] train episode 2602: reward = 200.00, steps = 200\n",
      "22:56:50 [DEBUG] train episode 2603: reward = 200.00, steps = 200\n",
      "22:56:50 [DEBUG] train episode 2604: reward = 133.00, steps = 133\n",
      "22:56:50 [DEBUG] train episode 2605: reward = 167.00, steps = 167\n",
      "22:56:50 [DEBUG] train episode 2606: reward = 200.00, steps = 200\n",
      "22:56:50 [DEBUG] train episode 2607: reward = 130.00, steps = 130\n",
      "22:56:50 [DEBUG] train episode 2608: reward = 200.00, steps = 200\n",
      "22:56:50 [DEBUG] train episode 2609: reward = 146.00, steps = 146\n",
      "22:56:50 [DEBUG] train episode 2610: reward = 190.00, steps = 190\n",
      "22:56:50 [DEBUG] train episode 2611: reward = 174.00, steps = 174\n",
      "22:56:51 [DEBUG] train episode 2612: reward = 127.00, steps = 127\n",
      "22:56:51 [DEBUG] train episode 2613: reward = 188.00, steps = 188\n",
      "22:56:51 [DEBUG] train episode 2614: reward = 200.00, steps = 200\n",
      "22:56:51 [DEBUG] train episode 2615: reward = 146.00, steps = 146\n",
      "22:56:51 [DEBUG] train episode 2616: reward = 192.00, steps = 192\n",
      "22:56:51 [DEBUG] train episode 2617: reward = 200.00, steps = 200\n",
      "22:56:51 [DEBUG] train episode 2618: reward = 195.00, steps = 195\n",
      "22:56:51 [DEBUG] train episode 2619: reward = 192.00, steps = 192\n",
      "22:56:51 [DEBUG] train episode 2620: reward = 200.00, steps = 200\n",
      "22:56:52 [DEBUG] train episode 2621: reward = 200.00, steps = 200\n",
      "22:56:52 [DEBUG] train episode 2622: reward = 170.00, steps = 170\n",
      "22:56:52 [DEBUG] train episode 2623: reward = 178.00, steps = 178\n",
      "22:56:52 [DEBUG] train episode 2624: reward = 172.00, steps = 172\n",
      "22:56:52 [DEBUG] train episode 2625: reward = 165.00, steps = 165\n",
      "22:56:52 [DEBUG] train episode 2626: reward = 131.00, steps = 131\n",
      "22:56:52 [DEBUG] train episode 2627: reward = 200.00, steps = 200\n",
      "22:56:52 [DEBUG] train episode 2628: reward = 200.00, steps = 200\n",
      "22:56:52 [DEBUG] train episode 2629: reward = 183.00, steps = 183\n",
      "22:56:53 [DEBUG] train episode 2630: reward = 144.00, steps = 144\n",
      "22:56:53 [DEBUG] train episode 2631: reward = 114.00, steps = 114\n",
      "22:56:53 [DEBUG] train episode 2632: reward = 200.00, steps = 200\n",
      "22:56:53 [DEBUG] train episode 2633: reward = 169.00, steps = 169\n",
      "22:56:53 [DEBUG] train episode 2634: reward = 146.00, steps = 146\n",
      "22:56:53 [DEBUG] train episode 2635: reward = 128.00, steps = 128\n",
      "22:56:53 [DEBUG] train episode 2636: reward = 200.00, steps = 200\n",
      "22:56:53 [DEBUG] train episode 2637: reward = 67.00, steps = 67\n",
      "22:56:53 [DEBUG] train episode 2638: reward = 200.00, steps = 200\n",
      "22:56:54 [DEBUG] train episode 2639: reward = 158.00, steps = 158\n",
      "22:56:54 [DEBUG] train episode 2640: reward = 177.00, steps = 177\n",
      "22:56:54 [DEBUG] train episode 2641: reward = 200.00, steps = 200\n",
      "22:56:54 [DEBUG] train episode 2642: reward = 151.00, steps = 151\n",
      "22:56:54 [DEBUG] train episode 2643: reward = 59.00, steps = 59\n",
      "22:56:54 [DEBUG] train episode 2644: reward = 177.00, steps = 177\n",
      "22:56:54 [DEBUG] train episode 2645: reward = 200.00, steps = 200\n",
      "22:56:54 [DEBUG] train episode 2646: reward = 200.00, steps = 200\n",
      "22:56:54 [DEBUG] train episode 2647: reward = 200.00, steps = 200\n",
      "22:56:54 [DEBUG] train episode 2648: reward = 132.00, steps = 132\n",
      "22:56:55 [DEBUG] train episode 2649: reward = 200.00, steps = 200\n",
      "22:56:55 [DEBUG] train episode 2650: reward = 178.00, steps = 178\n",
      "22:56:55 [DEBUG] train episode 2651: reward = 149.00, steps = 149\n",
      "22:56:55 [DEBUG] train episode 2652: reward = 200.00, steps = 200\n",
      "22:56:55 [DEBUG] train episode 2653: reward = 187.00, steps = 187\n",
      "22:56:55 [DEBUG] train episode 2654: reward = 179.00, steps = 179\n",
      "22:56:55 [DEBUG] train episode 2655: reward = 162.00, steps = 162\n",
      "22:56:55 [DEBUG] train episode 2656: reward = 200.00, steps = 200\n",
      "22:56:55 [DEBUG] train episode 2657: reward = 200.00, steps = 200\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:56:56 [DEBUG] train episode 2658: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2659: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2660: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2661: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2662: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2663: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2664: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2665: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2666: reward = 149.00, steps = 149\n",
      "22:56:57 [DEBUG] train episode 2667: reward = 200.00, steps = 200\n",
      "22:56:57 [DEBUG] train episode 2668: reward = 150.00, steps = 150\n",
      "22:56:57 [DEBUG] train episode 2669: reward = 176.00, steps = 176\n",
      "22:56:57 [DEBUG] train episode 2670: reward = 200.00, steps = 200\n",
      "22:56:57 [DEBUG] train episode 2671: reward = 158.00, steps = 158\n",
      "22:56:57 [DEBUG] train episode 2672: reward = 188.00, steps = 188\n",
      "22:56:57 [DEBUG] train episode 2673: reward = 200.00, steps = 200\n",
      "22:56:57 [DEBUG] train episode 2674: reward = 200.00, steps = 200\n",
      "22:56:58 [DEBUG] train episode 2675: reward = 200.00, steps = 200\n",
      "22:56:58 [DEBUG] train episode 2676: reward = 167.00, steps = 167\n",
      "22:56:58 [DEBUG] train episode 2677: reward = 184.00, steps = 184\n",
      "22:56:58 [DEBUG] train episode 2678: reward = 200.00, steps = 200\n",
      "22:56:58 [DEBUG] train episode 2679: reward = 170.00, steps = 170\n",
      "22:56:58 [DEBUG] train episode 2680: reward = 135.00, steps = 135\n",
      "22:56:58 [DEBUG] train episode 2681: reward = 143.00, steps = 143\n",
      "22:56:58 [DEBUG] train episode 2682: reward = 169.00, steps = 169\n",
      "22:56:59 [DEBUG] train episode 2683: reward = 200.00, steps = 200\n",
      "22:56:59 [DEBUG] train episode 2684: reward = 200.00, steps = 200\n",
      "22:56:59 [DEBUG] train episode 2685: reward = 163.00, steps = 163\n",
      "22:56:59 [DEBUG] train episode 2686: reward = 195.00, steps = 195\n",
      "22:56:59 [DEBUG] train episode 2687: reward = 200.00, steps = 200\n",
      "22:56:59 [DEBUG] train episode 2688: reward = 200.00, steps = 200\n",
      "22:56:59 [DEBUG] train episode 2689: reward = 152.00, steps = 152\n",
      "22:56:59 [DEBUG] train episode 2690: reward = 200.00, steps = 200\n",
      "22:57:00 [DEBUG] train episode 2691: reward = 200.00, steps = 200\n",
      "22:57:00 [DEBUG] train episode 2692: reward = 200.00, steps = 200\n",
      "22:57:00 [DEBUG] train episode 2693: reward = 200.00, steps = 200\n",
      "22:57:00 [DEBUG] train episode 2694: reward = 135.00, steps = 135\n",
      "22:57:00 [DEBUG] train episode 2695: reward = 200.00, steps = 200\n",
      "22:57:00 [DEBUG] train episode 2696: reward = 122.00, steps = 122\n",
      "22:57:00 [DEBUG] train episode 2697: reward = 155.00, steps = 155\n",
      "22:57:00 [DEBUG] train episode 2698: reward = 200.00, steps = 200\n",
      "22:57:01 [DEBUG] train episode 2699: reward = 200.00, steps = 200\n",
      "22:57:01 [DEBUG] train episode 2700: reward = 200.00, steps = 200\n",
      "22:57:01 [DEBUG] train episode 2701: reward = 200.00, steps = 200\n",
      "22:57:01 [DEBUG] train episode 2702: reward = 200.00, steps = 200\n",
      "22:57:01 [DEBUG] train episode 2703: reward = 200.00, steps = 200\n",
      "22:57:01 [DEBUG] train episode 2704: reward = 184.00, steps = 184\n",
      "22:57:01 [DEBUG] train episode 2705: reward = 200.00, steps = 200\n",
      "22:57:01 [DEBUG] train episode 2706: reward = 164.00, steps = 164\n",
      "22:57:02 [DEBUG] train episode 2707: reward = 200.00, steps = 200\n",
      "22:57:02 [DEBUG] train episode 2708: reward = 200.00, steps = 200\n",
      "22:57:02 [DEBUG] train episode 2709: reward = 154.00, steps = 154\n",
      "22:57:02 [DEBUG] train episode 2710: reward = 173.00, steps = 173\n",
      "22:57:02 [DEBUG] train episode 2711: reward = 200.00, steps = 200\n",
      "22:57:02 [DEBUG] train episode 2712: reward = 200.00, steps = 200\n",
      "22:57:02 [DEBUG] train episode 2713: reward = 200.00, steps = 200\n",
      "22:57:02 [DEBUG] train episode 2714: reward = 171.00, steps = 171\n",
      "22:57:03 [DEBUG] train episode 2715: reward = 200.00, steps = 200\n",
      "22:57:03 [DEBUG] train episode 2716: reward = 200.00, steps = 200\n",
      "22:57:03 [DEBUG] train episode 2717: reward = 200.00, steps = 200\n",
      "22:57:03 [DEBUG] train episode 2718: reward = 200.00, steps = 200\n",
      "22:57:03 [DEBUG] train episode 2719: reward = 188.00, steps = 188\n",
      "22:57:03 [DEBUG] train episode 2720: reward = 200.00, steps = 200\n",
      "22:57:03 [DEBUG] train episode 2721: reward = 152.00, steps = 152\n",
      "22:57:03 [DEBUG] train episode 2722: reward = 200.00, steps = 200\n",
      "22:57:03 [DEBUG] train episode 2723: reward = 200.00, steps = 200\n",
      "22:57:04 [DEBUG] train episode 2724: reward = 170.00, steps = 170\n",
      "22:57:04 [DEBUG] train episode 2725: reward = 154.00, steps = 154\n",
      "22:57:04 [DEBUG] train episode 2726: reward = 200.00, steps = 200\n",
      "22:57:04 [DEBUG] train episode 2727: reward = 200.00, steps = 200\n",
      "22:57:04 [DEBUG] train episode 2728: reward = 159.00, steps = 159\n",
      "22:57:04 [DEBUG] train episode 2729: reward = 200.00, steps = 200\n",
      "22:57:04 [DEBUG] train episode 2730: reward = 200.00, steps = 200\n",
      "22:57:04 [DEBUG] train episode 2731: reward = 200.00, steps = 200\n",
      "22:57:04 [DEBUG] train episode 2732: reward = 200.00, steps = 200\n",
      "22:57:05 [DEBUG] train episode 2733: reward = 200.00, steps = 200\n",
      "22:57:05 [DEBUG] train episode 2734: reward = 153.00, steps = 153\n",
      "22:57:05 [DEBUG] train episode 2735: reward = 200.00, steps = 200\n",
      "22:57:05 [DEBUG] train episode 2736: reward = 200.00, steps = 200\n",
      "22:57:05 [DEBUG] train episode 2737: reward = 200.00, steps = 200\n",
      "22:57:05 [DEBUG] train episode 2738: reward = 186.00, steps = 186\n",
      "22:57:05 [DEBUG] train episode 2739: reward = 200.00, steps = 200\n",
      "22:57:05 [DEBUG] train episode 2740: reward = 119.00, steps = 119\n",
      "22:57:05 [DEBUG] train episode 2741: reward = 178.00, steps = 178\n",
      "22:57:06 [DEBUG] train episode 2742: reward = 200.00, steps = 200\n",
      "22:57:06 [DEBUG] train episode 2743: reward = 185.00, steps = 185\n",
      "22:57:06 [DEBUG] train episode 2744: reward = 105.00, steps = 105\n",
      "22:57:06 [DEBUG] train episode 2745: reward = 171.00, steps = 171\n",
      "22:57:06 [DEBUG] train episode 2746: reward = 200.00, steps = 200\n",
      "22:57:06 [DEBUG] train episode 2747: reward = 200.00, steps = 200\n",
      "22:57:06 [DEBUG] train episode 2748: reward = 194.00, steps = 194\n",
      "22:57:06 [DEBUG] train episode 2749: reward = 111.00, steps = 111\n",
      "22:57:06 [DEBUG] train episode 2750: reward = 153.00, steps = 153\n",
      "22:57:06 [DEBUG] train episode 2751: reward = 200.00, steps = 200\n",
      "22:57:07 [DEBUG] train episode 2752: reward = 200.00, steps = 200\n",
      "22:57:07 [DEBUG] train episode 2753: reward = 185.00, steps = 185\n",
      "22:57:07 [DEBUG] train episode 2754: reward = 200.00, steps = 200\n",
      "22:57:07 [DEBUG] train episode 2755: reward = 181.00, steps = 181\n",
      "22:57:07 [DEBUG] train episode 2756: reward = 68.00, steps = 68\n",
      "22:57:07 [DEBUG] train episode 2757: reward = 185.00, steps = 185\n",
      "22:57:07 [DEBUG] train episode 2758: reward = 200.00, steps = 200\n",
      "22:57:07 [DEBUG] train episode 2759: reward = 200.00, steps = 200\n",
      "22:57:07 [DEBUG] train episode 2760: reward = 200.00, steps = 200\n",
      "22:57:08 [DEBUG] train episode 2761: reward = 191.00, steps = 191\n",
      "22:57:08 [DEBUG] train episode 2762: reward = 200.00, steps = 200\n",
      "22:57:08 [DEBUG] train episode 2763: reward = 200.00, steps = 200\n",
      "22:57:08 [DEBUG] train episode 2764: reward = 200.00, steps = 200\n",
      "22:57:08 [DEBUG] train episode 2765: reward = 200.00, steps = 200\n",
      "22:57:08 [DEBUG] train episode 2766: reward = 176.00, steps = 176\n",
      "22:57:08 [DEBUG] train episode 2767: reward = 200.00, steps = 200\n",
      "22:57:08 [DEBUG] train episode 2768: reward = 200.00, steps = 200\n",
      "22:57:09 [DEBUG] train episode 2769: reward = 200.00, steps = 200\n",
      "22:57:09 [DEBUG] train episode 2770: reward = 167.00, steps = 167\n",
      "22:57:09 [DEBUG] train episode 2771: reward = 200.00, steps = 200\n",
      "22:57:09 [DEBUG] train episode 2772: reward = 200.00, steps = 200\n",
      "22:57:09 [DEBUG] train episode 2773: reward = 200.00, steps = 200\n",
      "22:57:09 [DEBUG] train episode 2774: reward = 200.00, steps = 200\n",
      "22:57:09 [DEBUG] train episode 2775: reward = 200.00, steps = 200\n",
      "22:57:09 [DEBUG] train episode 2776: reward = 200.00, steps = 200\n",
      "22:57:09 [DEBUG] train episode 2777: reward = 200.00, steps = 200\n",
      "22:57:10 [DEBUG] train episode 2778: reward = 200.00, steps = 200\n",
      "22:57:10 [DEBUG] train episode 2779: reward = 200.00, steps = 200\n",
      "22:57:10 [DEBUG] train episode 2780: reward = 200.00, steps = 200\n",
      "22:57:10 [DEBUG] train episode 2781: reward = 200.00, steps = 200\n",
      "22:57:10 [DEBUG] train episode 2782: reward = 200.00, steps = 200\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:57:10 [DEBUG] train episode 2783: reward = 200.00, steps = 200\n",
      "22:57:10 [DEBUG] train episode 2784: reward = 200.00, steps = 200\n",
      "22:57:10 [DEBUG] train episode 2785: reward = 112.00, steps = 112\n",
      "22:57:10 [DEBUG] train episode 2786: reward = 200.00, steps = 200\n",
      "22:57:11 [DEBUG] train episode 2787: reward = 200.00, steps = 200\n",
      "22:57:11 [DEBUG] train episode 2788: reward = 200.00, steps = 200\n",
      "22:57:11 [DEBUG] train episode 2789: reward = 143.00, steps = 143\n",
      "22:57:11 [DEBUG] train episode 2790: reward = 173.00, steps = 173\n",
      "22:57:11 [DEBUG] train episode 2791: reward = 200.00, steps = 200\n",
      "22:57:11 [DEBUG] train episode 2792: reward = 200.00, steps = 200\n",
      "22:57:11 [DEBUG] train episode 2793: reward = 200.00, steps = 200\n",
      "22:57:12 [DEBUG] train episode 2794: reward = 200.00, steps = 200\n",
      "22:57:12 [DEBUG] train episode 2795: reward = 200.00, steps = 200\n",
      "22:57:12 [DEBUG] train episode 2796: reward = 200.00, steps = 200\n",
      "22:57:12 [DEBUG] train episode 2797: reward = 200.00, steps = 200\n",
      "22:57:12 [DEBUG] train episode 2798: reward = 200.00, steps = 200\n",
      "22:57:12 [DEBUG] train episode 2799: reward = 200.00, steps = 200\n",
      "22:57:12 [DEBUG] train episode 2800: reward = 200.00, steps = 200\n",
      "22:57:13 [DEBUG] train episode 2801: reward = 163.00, steps = 163\n",
      "22:57:13 [DEBUG] train episode 2802: reward = 200.00, steps = 200\n",
      "22:57:13 [DEBUG] train episode 2803: reward = 164.00, steps = 164\n",
      "22:57:13 [DEBUG] train episode 2804: reward = 200.00, steps = 200\n",
      "22:57:13 [DEBUG] train episode 2805: reward = 200.00, steps = 200\n",
      "22:57:13 [DEBUG] train episode 2806: reward = 200.00, steps = 200\n",
      "22:57:13 [DEBUG] train episode 2807: reward = 134.00, steps = 134\n",
      "22:57:13 [DEBUG] train episode 2808: reward = 200.00, steps = 200\n",
      "22:57:13 [DEBUG] train episode 2809: reward = 200.00, steps = 200\n",
      "22:57:14 [DEBUG] train episode 2810: reward = 200.00, steps = 200\n",
      "22:57:14 [DEBUG] train episode 2811: reward = 43.00, steps = 43\n",
      "22:57:14 [DEBUG] train episode 2812: reward = 200.00, steps = 200\n",
      "22:57:14 [DEBUG] train episode 2813: reward = 200.00, steps = 200\n",
      "22:57:14 [DEBUG] train episode 2814: reward = 200.00, steps = 200\n",
      "22:57:14 [DEBUG] train episode 2815: reward = 33.00, steps = 33\n",
      "22:57:14 [DEBUG] train episode 2816: reward = 200.00, steps = 200\n",
      "22:57:14 [DEBUG] train episode 2817: reward = 172.00, steps = 172\n",
      "22:57:14 [DEBUG] train episode 2818: reward = 187.00, steps = 187\n",
      "22:57:14 [DEBUG] train episode 2819: reward = 200.00, steps = 200\n",
      "22:57:15 [DEBUG] train episode 2820: reward = 200.00, steps = 200\n",
      "22:57:15 [DEBUG] train episode 2821: reward = 200.00, steps = 200\n",
      "22:57:15 [DEBUG] train episode 2822: reward = 200.00, steps = 200\n",
      "22:57:15 [DEBUG] train episode 2823: reward = 198.00, steps = 198\n",
      "22:57:15 [DEBUG] train episode 2824: reward = 200.00, steps = 200\n",
      "22:57:15 [DEBUG] train episode 2825: reward = 200.00, steps = 200\n",
      "22:57:15 [DEBUG] train episode 2826: reward = 149.00, steps = 149\n",
      "22:57:15 [DEBUG] train episode 2827: reward = 200.00, steps = 200\n",
      "22:57:15 [DEBUG] train episode 2828: reward = 200.00, steps = 200\n",
      "22:57:16 [DEBUG] train episode 2829: reward = 200.00, steps = 200\n",
      "22:57:16 [DEBUG] train episode 2830: reward = 200.00, steps = 200\n",
      "22:57:16 [DEBUG] train episode 2831: reward = 200.00, steps = 200\n",
      "22:57:16 [DEBUG] train episode 2832: reward = 122.00, steps = 122\n",
      "22:57:16 [DEBUG] train episode 2833: reward = 200.00, steps = 200\n",
      "22:57:16 [DEBUG] train episode 2834: reward = 200.00, steps = 200\n",
      "22:57:16 [DEBUG] train episode 2835: reward = 200.00, steps = 200\n",
      "22:57:16 [DEBUG] train episode 2836: reward = 200.00, steps = 200\n",
      "22:57:17 [DEBUG] train episode 2837: reward = 200.00, steps = 200\n",
      "22:57:17 [DEBUG] train episode 2838: reward = 81.00, steps = 81\n",
      "22:57:17 [DEBUG] train episode 2839: reward = 200.00, steps = 200\n",
      "22:57:17 [DEBUG] train episode 2840: reward = 200.00, steps = 200\n",
      "22:57:17 [DEBUG] train episode 2841: reward = 141.00, steps = 141\n",
      "22:57:17 [DEBUG] train episode 2842: reward = 200.00, steps = 200\n",
      "22:57:17 [DEBUG] train episode 2843: reward = 200.00, steps = 200\n",
      "22:57:17 [DEBUG] train episode 2844: reward = 200.00, steps = 200\n",
      "22:57:17 [DEBUG] train episode 2845: reward = 200.00, steps = 200\n",
      "22:57:17 [DEBUG] train episode 2846: reward = 200.00, steps = 200\n",
      "22:57:18 [DEBUG] train episode 2847: reward = 145.00, steps = 145\n",
      "22:57:18 [DEBUG] train episode 2848: reward = 200.00, steps = 200\n",
      "22:57:18 [DEBUG] train episode 2849: reward = 200.00, steps = 200\n",
      "22:57:18 [DEBUG] train episode 2850: reward = 200.00, steps = 200\n",
      "22:57:18 [DEBUG] train episode 2851: reward = 200.00, steps = 200\n",
      "22:57:18 [DEBUG] train episode 2852: reward = 200.00, steps = 200\n",
      "22:57:18 [DEBUG] train episode 2853: reward = 200.00, steps = 200\n",
      "22:57:19 [DEBUG] train episode 2854: reward = 200.00, steps = 200\n",
      "22:57:19 [DEBUG] train episode 2855: reward = 200.00, steps = 200\n",
      "22:57:19 [DEBUG] train episode 2856: reward = 200.00, steps = 200\n",
      "22:57:19 [DEBUG] train episode 2857: reward = 149.00, steps = 149\n",
      "22:57:19 [DEBUG] train episode 2858: reward = 162.00, steps = 162\n",
      "22:57:19 [DEBUG] train episode 2859: reward = 200.00, steps = 200\n",
      "22:57:19 [DEBUG] train episode 2860: reward = 200.00, steps = 200\n",
      "22:57:19 [DEBUG] train episode 2861: reward = 200.00, steps = 200\n",
      "22:57:19 [DEBUG] train episode 2862: reward = 200.00, steps = 200\n",
      "22:57:20 [DEBUG] train episode 2863: reward = 121.00, steps = 121\n",
      "22:57:20 [DEBUG] train episode 2864: reward = 200.00, steps = 200\n",
      "22:57:20 [DEBUG] train episode 2865: reward = 200.00, steps = 200\n",
      "22:57:20 [DEBUG] train episode 2866: reward = 139.00, steps = 139\n",
      "22:57:20 [DEBUG] train episode 2867: reward = 200.00, steps = 200\n",
      "22:57:20 [DEBUG] train episode 2868: reward = 200.00, steps = 200\n",
      "22:57:20 [DEBUG] train episode 2869: reward = 200.00, steps = 200\n",
      "22:57:21 [DEBUG] train episode 2870: reward = 200.00, steps = 200\n",
      "22:57:21 [DEBUG] train episode 2871: reward = 200.00, steps = 200\n",
      "22:57:21 [DEBUG] train episode 2872: reward = 183.00, steps = 183\n",
      "22:57:21 [DEBUG] train episode 2873: reward = 200.00, steps = 200\n",
      "22:57:21 [DEBUG] train episode 2874: reward = 200.00, steps = 200\n",
      "22:57:21 [DEBUG] train episode 2875: reward = 187.00, steps = 187\n",
      "22:57:21 [DEBUG] train episode 2876: reward = 29.00, steps = 29\n",
      "22:57:21 [DEBUG] train episode 2877: reward = 200.00, steps = 200\n",
      "22:57:21 [DEBUG] train episode 2878: reward = 98.00, steps = 98\n",
      "22:57:21 [DEBUG] train episode 2879: reward = 130.00, steps = 130\n",
      "22:57:22 [DEBUG] train episode 2880: reward = 200.00, steps = 200\n",
      "22:57:22 [DEBUG] train episode 2881: reward = 200.00, steps = 200\n",
      "22:57:22 [DEBUG] train episode 2882: reward = 200.00, steps = 200\n",
      "22:57:22 [DEBUG] train episode 2883: reward = 200.00, steps = 200\n",
      "22:57:22 [DEBUG] train episode 2884: reward = 200.00, steps = 200\n",
      "22:57:22 [DEBUG] train episode 2885: reward = 181.00, steps = 181\n",
      "22:57:22 [DEBUG] train episode 2886: reward = 200.00, steps = 200\n",
      "22:57:22 [DEBUG] train episode 2887: reward = 200.00, steps = 200\n",
      "22:57:22 [DEBUG] train episode 2888: reward = 200.00, steps = 200\n",
      "22:57:23 [DEBUG] train episode 2889: reward = 200.00, steps = 200\n",
      "22:57:23 [DEBUG] train episode 2890: reward = 117.00, steps = 117\n",
      "22:57:23 [DEBUG] train episode 2891: reward = 182.00, steps = 182\n",
      "22:57:23 [DEBUG] train episode 2892: reward = 200.00, steps = 200\n",
      "22:57:23 [DEBUG] train episode 2893: reward = 200.00, steps = 200\n",
      "22:57:23 [DEBUG] train episode 2894: reward = 113.00, steps = 113\n",
      "22:57:23 [DEBUG] train episode 2895: reward = 200.00, steps = 200\n",
      "22:57:23 [DEBUG] train episode 2896: reward = 200.00, steps = 200\n",
      "22:57:23 [DEBUG] train episode 2897: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2898: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2899: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2900: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2901: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2902: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2903: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2904: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2905: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2906: reward = 200.00, steps = 200\n",
      "22:57:25 [DEBUG] train episode 2907: reward = 200.00, steps = 200\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:57:25 [DEBUG] train episode 2908: reward = 200.00, steps = 200\n",
      "22:57:25 [DEBUG] train episode 2909: reward = 200.00, steps = 200\n",
      "22:57:25 [DEBUG] train episode 2910: reward = 200.00, steps = 200\n",
      "22:57:25 [DEBUG] train episode 2911: reward = 200.00, steps = 200\n",
      "22:57:25 [DEBUG] train episode 2912: reward = 194.00, steps = 194\n",
      "22:57:25 [DEBUG] train episode 2913: reward = 200.00, steps = 200\n",
      "22:57:25 [DEBUG] train episode 2914: reward = 200.00, steps = 200\n",
      "22:57:25 [INFO] ==== test ====\n",
      "22:57:26 [DEBUG] test episode 0: reward = 200.00, steps = 200\n",
      "22:57:26 [DEBUG] test episode 1: reward = 200.00, steps = 200\n",
      "22:57:26 [DEBUG] test episode 2: reward = 163.00, steps = 163\n",
      "22:57:26 [DEBUG] test episode 3: reward = 200.00, steps = 200\n",
      "22:57:26 [DEBUG] test episode 4: reward = 200.00, steps = 200\n",
      "22:57:26 [DEBUG] test episode 5: reward = 200.00, steps = 200\n",
      "22:57:26 [DEBUG] test episode 6: reward = 200.00, steps = 200\n",
      "22:57:26 [DEBUG] test episode 7: reward = 200.00, steps = 200\n",
      "22:57:27 [DEBUG] test episode 8: reward = 197.00, steps = 197\n",
      "22:57:27 [DEBUG] test episode 9: reward = 200.00, steps = 200\n",
      "22:57:27 [DEBUG] test episode 10: reward = 113.00, steps = 113\n",
      "22:57:27 [DEBUG] test episode 11: reward = 29.00, steps = 29\n",
      "22:57:27 [DEBUG] test episode 12: reward = 200.00, steps = 200\n",
      "22:57:27 [DEBUG] test episode 13: reward = 200.00, steps = 200\n",
      "22:57:27 [DEBUG] test episode 14: reward = 200.00, steps = 200\n",
      "22:57:27 [DEBUG] test episode 15: reward = 200.00, steps = 200\n",
      "22:57:27 [DEBUG] test episode 16: reward = 200.00, steps = 200\n",
      "22:57:27 [DEBUG] test episode 17: reward = 200.00, steps = 200\n",
      "22:57:28 [DEBUG] test episode 18: reward = 200.00, steps = 200\n",
      "22:57:28 [DEBUG] test episode 19: reward = 200.00, steps = 200\n",
      "22:57:28 [DEBUG] test episode 20: reward = 159.00, steps = 159\n",
      "22:57:28 [DEBUG] test episode 21: reward = 200.00, steps = 200\n",
      "22:57:28 [DEBUG] test episode 22: reward = 200.00, steps = 200\n",
      "22:57:28 [DEBUG] test episode 23: reward = 54.00, steps = 54\n",
      "22:57:28 [DEBUG] test episode 24: reward = 200.00, steps = 200\n",
      "22:57:28 [DEBUG] test episode 25: reward = 200.00, steps = 200\n",
      "22:57:28 [DEBUG] test episode 26: reward = 132.00, steps = 132\n",
      "22:57:28 [DEBUG] test episode 27: reward = 118.00, steps = 118\n",
      "22:57:28 [DEBUG] test episode 28: reward = 196.00, steps = 196\n",
      "22:57:29 [DEBUG] test episode 29: reward = 199.00, steps = 199\n",
      "22:57:29 [DEBUG] test episode 30: reward = 200.00, steps = 200\n",
      "22:57:29 [DEBUG] test episode 31: reward = 200.00, steps = 200\n",
      "22:57:29 [DEBUG] test episode 32: reward = 200.00, steps = 200\n",
      "22:57:29 [DEBUG] test episode 33: reward = 200.00, steps = 200\n",
      "22:57:29 [DEBUG] test episode 34: reward = 200.00, steps = 200\n",
      "22:57:29 [DEBUG] test episode 35: reward = 200.00, steps = 200\n",
      "22:57:29 [DEBUG] test episode 36: reward = 187.00, steps = 187\n",
      "22:57:29 [DEBUG] test episode 37: reward = 200.00, steps = 200\n",
      "22:57:30 [DEBUG] test episode 38: reward = 200.00, steps = 200\n",
      "22:57:30 [DEBUG] test episode 39: reward = 200.00, steps = 200\n",
      "22:57:30 [DEBUG] test episode 40: reward = 200.00, steps = 200\n",
      "22:57:30 [DEBUG] test episode 41: reward = 200.00, steps = 200\n",
      "22:57:30 [DEBUG] test episode 42: reward = 200.00, steps = 200\n",
      "22:57:30 [DEBUG] test episode 43: reward = 200.00, steps = 200\n",
      "22:57:30 [DEBUG] test episode 44: reward = 200.00, steps = 200\n",
      "22:57:30 [DEBUG] test episode 45: reward = 165.00, steps = 165\n",
      "22:57:30 [DEBUG] test episode 46: reward = 200.00, steps = 200\n",
      "22:57:31 [DEBUG] test episode 47: reward = 200.00, steps = 200\n",
      "22:57:31 [DEBUG] test episode 48: reward = 200.00, steps = 200\n",
      "22:57:31 [DEBUG] test episode 49: reward = 200.00, steps = 200\n",
      "22:57:31 [DEBUG] test episode 50: reward = 200.00, steps = 200\n",
      "22:57:31 [DEBUG] test episode 51: reward = 191.00, steps = 191\n",
      "22:57:31 [DEBUG] test episode 52: reward = 200.00, steps = 200\n",
      "22:57:31 [DEBUG] test episode 53: reward = 200.00, steps = 200\n",
      "22:57:31 [DEBUG] test episode 54: reward = 125.00, steps = 125\n",
      "22:57:31 [DEBUG] test episode 55: reward = 200.00, steps = 200\n",
      "22:57:32 [DEBUG] test episode 56: reward = 200.00, steps = 200\n",
      "22:57:32 [DEBUG] test episode 57: reward = 200.00, steps = 200\n",
      "22:57:32 [DEBUG] test episode 58: reward = 114.00, steps = 114\n",
      "22:57:32 [DEBUG] test episode 59: reward = 120.00, steps = 120\n",
      "22:57:32 [DEBUG] test episode 60: reward = 200.00, steps = 200\n",
      "22:57:32 [DEBUG] test episode 61: reward = 200.00, steps = 200\n",
      "22:57:32 [DEBUG] test episode 62: reward = 200.00, steps = 200\n",
      "22:57:32 [DEBUG] test episode 63: reward = 122.00, steps = 122\n",
      "22:57:32 [DEBUG] test episode 64: reward = 200.00, steps = 200\n",
      "22:57:33 [DEBUG] test episode 65: reward = 137.00, steps = 137\n",
      "22:57:33 [DEBUG] test episode 66: reward = 200.00, steps = 200\n",
      "22:57:33 [DEBUG] test episode 67: reward = 200.00, steps = 200\n",
      "22:57:33 [DEBUG] test episode 68: reward = 200.00, steps = 200\n",
      "22:57:33 [DEBUG] test episode 69: reward = 200.00, steps = 200\n",
      "22:57:33 [DEBUG] test episode 70: reward = 200.00, steps = 200\n",
      "22:57:33 [DEBUG] test episode 71: reward = 200.00, steps = 200\n",
      "22:57:34 [DEBUG] test episode 72: reward = 200.00, steps = 200\n",
      "22:57:34 [DEBUG] test episode 73: reward = 200.00, steps = 200\n",
      "22:57:34 [DEBUG] test episode 74: reward = 200.00, steps = 200\n",
      "22:57:34 [DEBUG] test episode 75: reward = 178.00, steps = 178\n",
      "22:57:34 [DEBUG] test episode 76: reward = 200.00, steps = 200\n",
      "22:57:34 [DEBUG] test episode 77: reward = 200.00, steps = 200\n",
      "22:57:34 [DEBUG] test episode 78: reward = 200.00, steps = 200\n",
      "22:57:34 [DEBUG] test episode 79: reward = 170.00, steps = 170\n",
      "22:57:34 [DEBUG] test episode 80: reward = 200.00, steps = 200\n",
      "22:57:35 [DEBUG] test episode 81: reward = 200.00, steps = 200\n",
      "22:57:35 [DEBUG] test episode 82: reward = 200.00, steps = 200\n",
      "22:57:35 [DEBUG] test episode 83: reward = 200.00, steps = 200\n",
      "22:57:35 [DEBUG] test episode 84: reward = 200.00, steps = 200\n",
      "22:57:35 [DEBUG] test episode 85: reward = 200.00, steps = 200\n",
      "22:57:35 [DEBUG] test episode 86: reward = 167.00, steps = 167\n",
      "22:57:35 [DEBUG] test episode 87: reward = 110.00, steps = 110\n",
      "22:57:35 [DEBUG] test episode 88: reward = 200.00, steps = 200\n",
      "22:57:35 [DEBUG] test episode 89: reward = 200.00, steps = 200\n",
      "22:57:35 [DEBUG] test episode 90: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 91: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 92: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 93: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 94: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 95: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 96: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 97: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 98: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 99: reward = 200.00, steps = 200\n",
      "22:57:36 [INFO] average episode reward = 187.46 ± 31.36\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO2deZgU1bn/v2/39OwbszDAMMMwwww7DDCyKgjIJkZAo6KJmmgkJhoTNQuuMXH5cRM1uYnR/Eg06r0uSVyiNyYqkms0cWURZN9EZBFGdgSBmXnvH1099PRUddfaVdX9fp5nnuk+XafqPbV8z6n3nPMeYmYIgiAI6UHAbQMEQRCE5CGiLwiCkEaI6AuCIKQRIvqCIAhphIi+IAhCGpHhtgEAUFZWxjU1NW6bIQiC4CuWLl36GTOXG8njCdGvqanBkiVL3DZDEATBVxDRx0bziHtHEAQhjRDRFwRBSCNE9AVBENIIEX1BEIQ0QkRfEAQhjUgo+kRURUT/S0RriWg1EX1XSS8hokVEtFH53yUqz01EtImI1hPRNCcLIAiCIOhHT0u/BcCNzNwfwGgA1xDRAADzASxm5noAi5XvUH6bC2AggOkAHiSioBPGC4IgCMZIOE6fmXcB2KV8PkxEawFUApgF4Exls8cAvA7gR0r608x8HMBHRLQJwEgAb9ttvBfZvv8oNu45gol9u6r+/srqTzG8ugvW7DqE2rI8VJXktv/2xoZmPPCPTfhSYw+M6l2ChooCAMCxE63424e7cN7wSgy+41UQAYe/aMHMwd3xy7mNCAUD2PrZ5zjvobcQChJqy/Kx9tNDKMnNxLmNPVCUE8L7W/dh75ET+M6keuw7egIPvb4ZoSBhYt+uWLXjII63tGHHgWMYVl2MV1Z9is9PtOL+C4eipZVBBNz+wmocO9naoSwja0rwwFeG4bwH38LeIydwvKUVbUqk7hG9umDVjoMYW1eKZdsOYMqACjyzdDvmz+iHnQeO4R/r9qCpVxfUlOXhj+9/gkGVRVj28f72Y9w1exAaKgrw07+uwXsf7cOUARXICBCyMgJYtu0AHvrqcDzx7jZ0yQ1h5faDKM3LxKqdh3Dw2EmMri3FzgPHsPTj/QCAgT0KsXrnIUzsW45l2w7g4LGTqtfm9D5lyA4F8NraPagtz8OW5s8BAOUFWWg+fByDK4tw9uDueOnDnVi141CHvNmhAAJEyM3MwGdHjiM/KwOja0vw2to97dsU5YQ6HXt2Yw/85YOdAIDRtSXYsPsI9h89ATMRz7NDAZxWU4KDx05i5faDAID8rAwcOd6CjAChpS3xTgMEJNqspjQXW/ceRXVJLrbtO4p+3Qqw7tPDyAkF0coMZsbJVsbI3iV476N97fl6dsnB9v3HEtpQ3zUfG/ccARE6nYeqkhzcOKUvfvbyOuw8+AXunjMItzy/qt2misIsBIiw+9AXmD2sEu9u2Yfmw8dxorWt0/4ri3Ow48AxZAYDGFNXin9uaG7fZkJDOf65oRkVhVnYfei4qp3BAKFVOVnnDa/E88t34LReJXhv66kyR8owurYE72wJp+eEgjh2shUXNVXhH+v34PtTG3DRadUJz4tdkJF4+kRUA+ANAIMAbGPm4qjf9jNzFyJ6AMA7zPzfSvrDAP7OzM/E7GsegHkAUF1dPeLjjw3PMfAkg378Co4cb8HWBTM7/XbsRCv63/5y+0OSmRHAhrtmtP9eM/+lDttH9nHL8x/iiXe34brJ9fjV4o0dtvnh9L749pl9OuUVBMEfBAOEzfecbSovES1l5iYjeXR35BJRPoBnAXyPmQ/F21QlrVPNwswLmbmJmZvKyw3NIvY0R463aP7WqlSwn+w7CgA40dKmuW00kZbG7oNfdPpt35ETRk0UBMFDBNQU08nj6dmIiEIIC/4TzPyckrybiLorv3cHEHmP3Q6gKip7TwA77TE3veHOdacgCD4nmGTV1zN6hwA8DGAtM98f9dOLAC5XPl8O4IWo9LlElEVEvQHUA3jPPpMFQRBSh4xAckfO6wm4Ng7ApQA+JKIPlLSbASwA8CciuhLANgAXAAAzryaiPwFYg/DIn2uYubXzbgX9aLfwKcmvhoIg2EuyW/p6Ru/8C+p+egCYrJHnbgB3W7BLUEHWsBeE1CMU9Jh7R/AC4ZtCNF8QUo9pA7sl9XieiKefKhw8qj7+22n0jgISBMFbvHvzZJTmZSb1mCL6NvHM0u34/p9XGM53oqUNAQIyguZfumb/5t+m8wqC4B4VhdlJP6a4d2zizY3NCbdRmwjXcOvfMfvBRKId37GzZle8aROCIAinENH3ALFT+gVBEJxCRD+JkMXxlWqjd6zuUxCE9EJE3yacld7I6B0ZvyMIgjVE9H2BiL0gCPYgou8jyOH3CUEQUh8RfUEQhDRCRN9HqPn0F76xxQVLBEHwKyL6NuHEKJqL/v/b2NJ8xPb9CoKQvojoe5h3P9qHe19d77YZgiCkECL6NpGULlYZxCMIgkVE9G1C9FgQBD8goi8IgpBGiOibhJnx+ze3YPeh8GLletw7agHXjPDJ/qOW8guCIOhZI/cRItpDRKui0v5IRB8of1sjyygSUQ0RHYv67bdOGu8mWz77HHe9tBZX//dSx4915HgLAOD9rfsdP5YgCKmNnnj6jwJ4AMDjkQRmvijymYjuA3AwavvNzNxol4FepbUt3Go//EVYkJ3syW2TDgNBEGxCzxq5bxBRjdpvFB6cfiGASfaa5X3MaLwZ7ZbQC4Ig2IlVn/4ZAHYz88aotN5EtJyI/klEZ2hlJKJ5RLSEiJY0NydegMSraPnpF6/drZ3HKWMEwSH6dStw2wTBJqyK/sUAnor6vgtANTMPA3ADgCeJqFAtIzMvZOYmZm4qLy+3aEbySTQB98rHlmDTnsOqv1nszxUEQTCNadEnogwA5wH4YySNmY8z817l81IAmwE0WDXSr3x+vFU13UhcfBmxIwiCnVhp6Z8FYB0zb48kEFE5EQWVz7UA6gGkTESwtjbGyu0HVH/T43uPtPCNtPRXbj+YeCNBEASd6Bmy+RSAtwH0JaLtRHSl8tNcdHTtAMB4ACuJaAWAZwBczcz77DTYTRa+uQXnPvBvvPfRqSIlw1MjXbmCINiFntE7F2ukf00l7VkAz1o3y5us3RVewHzngWMoycs0vR+jFYV0AQiCYBcyI9cAqm4ZI4os6i34FCdChwvuIKJvEkvPgIi/4DNE8lMHEX2bMFIJGBm9A8gDJ7hLeUGW2yYINiKibxFj3h1p4gv+Y9H14902wdN8d3I9ZjX2cNsM3Yjom0RP63vWb/6NX762wfKxpKoQ3KQ41/yghXSguiQXRTkht83QjYi+CaJdOYnCJT/+9sed0iSAmuA3pB83dRDRTxKDfvwKbn9htam88rwJXue6SX3cNkHQiYi+TSQS5iPHW/Diip2m9r1m5yFT+QTBLhK19LsWZifHEI/ip3haIvomsTJuuc3gHXJYWURFENwi0S0r7h//IKJvER9V8IJgGj+1ZJON3yo8EX2b8NuFFwTBHvxWIYroGyDetTVy4f12kwiCEB8/zcER0TdJpGEvAi6kA3Kba+O3t3wRfZPEXmi/XXhBMEKi+SiCfxDRt0ii17qTrW24669rkmSNIAhu4Kc6UUTfJrRWzjr8RQt+/6+PkmyNICQXPSvHCd5ARF8QBEEHdeV5bptgC3qWS3yEiPYQ0aqotDuIaAcRfaD8nR31201EtImI1hPRNKcMd5tIy+aTfcfE3ykIacCr109QTfdbf56elv6jAKarpP+CmRuVv78BABENQHjt3IFKngcjC6WnMm9t3uu2CYLgKNKuAQIEdCvMxq0z+3f6zU+nJ6HoM/MbAPQubj4LwNPMfJyZPwKwCcBIC/Z5iugWfXTtfryl1QVrBCF5+GkculMQEd65eTIuPK3KbVMsYcWnfy0RrVTcP12UtEoAn0Rts11J6wQRzSOiJUS0pLm52YIZ7sPsv1c8QTCCtPRTB7Oi/xCAOgCNAHYBuE9JV5M+1duFmRcycxMzN5WXl5s0QxAEL5BqjZ7skDFp9FOlaEr0mXk3M7cycxuA3+GUC2c7gOh3n54AzMUT9jBWImwKguB91t05AzdOaVD9Lfbp99twVVOiT0Tdo77OARAZ2fMigLlElEVEvQHUA3jPmonpwdETEj5Z8C5+aMj+5pLhtu7PD2U2Q0aiDYjoKQBnAigjou0AfgzgTCJqRPi8bAXwTQBg5tVE9CcAawC0ALiGmVOyl9Puxv7V/73M3h0Kgo34YVhywF8NbtdIKPrMfLFK8sNxtr8bwN1WjPIqWre9HR2578iwT0HwBeruXe9XihFkRq4gCJZJxUa2nS83P5010L6dWURE3wCO3tip+NQIHTh3aA+3TRAMoHdugp63/OwM78xRFdE3iYzgEYySEfTvPeMH50XQZqd+m0ahY4/CfOqt4AaNET9eatSJ6AuCkJC8zITdf66TFbK3NW2m87ogW/08hTxU4YvoGyD+LWDtonrnlhCEzlQUZrltQkLsfoZatZr6sccV944gCOlGtPBVl+TiiW+McsWGPl3zbdtfq0ZL3++eXRF9G/CDv1MQnCRaH4mAcX3KXLHj6XmjkRm0R9Z8MDXBFCL6JlCr6P1e+wuCXbgllgRCWX4WxtSV2rK/Np3uHSBxmb1Uf4joewSpNAQ/46X7tzg3ZMt+NN07Pu+BE9E3id3T0v1+IwmJycrw7+Nm5HZ3qwKIHPfO2YNs2V+2gdFAflpvwL93oRv457oKHmT6oO4YWVPithkpS6SuKcwOobGq2PL+rptUrz3u3iBe6h8Q0bcBPwSjEtwnSIRrJ/Vx2wxTeMl9o4nNNuZkBnHd5PrOh4lznByb5wo4gYi+TfjhmRCEVMYLLlI/VI4i+iaIvbB2hGTww80iWMev74SJXmajBVdu5c54yeef9qL/X+98jGXb9hvOF/0QiHtH0INU7M4SfX6tPJGT+3U1eFzylM8+Ed4PqOEwt/0lvOjX1gUzXbVD9EAQrOEFf7oXXEyJ8H1Lf+nH+/CPdbvdNkMQdOF9SfAv/boXuG2CJl56E0go+kT0CBHtIaJVUWk/J6J1RLSSiJ4nomIlvYaIjhHRB8rfb500HgDOf+htXPHoEqcPkxB5dRcSIbeIs2TZFNQs1Z9lPS39RwFMj0lbBGAQMw8BsAHATVG/bWbmRuXvanvMTB5tbYyn39uGEy1tuvPYUYlLfP40IJUvcVTZvHAvXza6l+5tM4MBdC/Kbv+esNNapXgeasgnJKHoM/MbAPbFpL3KzC3K13cA9HTANld4YcUOzH/uQzz4+iZD+az68lra9Fcygn/xkzhY5ftTG5CX6Y6ffXDPIt3bbrh7BvKzzHdvEqIqCg0Z8NJ1t8OnfwWAv0d9701Ey4non0R0hlYmIppHREuIaElzc7MNZtjDwaMnAQD7Pz9hKJ/VIVlfnBTRF1KLayfVY/VPY50E3oRI/bMevCToerAk+kR0C4AWAE8oSbsAVDPzMAA3AHiSiArV8jLzQmZuYuam8vJyK2bYwrETrQm38dJYW8F/+GFkhxH6dTPfcerlOEQJI2b6XAZMn3kiuhzAOQC+wspAdWY+zsx7lc9LAWwGYE/wCof5wTMrNH9bvm0/Wlrjt8RT7YEWBCcZ0KPQ0WHSyRRm0vgcjZfm8pgSfSKaDuBHAM5l5qNR6eVEFFQ+1wKoB7DFDkOdZu2uQ6rpq3cexJwH38LPX12vmddD11NIAsOqzQfzStWmgR7hi8Zrz0y8Rtuo3qkVJC9h7wURPQXgTABlRLQdwI8RHq2TBWCR0lP/jjJSZzyAnxJRC4BWAFcz8z7VHXsMrREHnx0J+/bX7DzUvugxwV8z8ARv4IFBLaZJdLv7/XGId22evGp03PVytfKeN6wSzy3fAQDoa8EVZjd6Ru9czMzdmTnEzD2Z+WFm7sPMVbFDM5n5WWYeyMxDmXk4M/+P80VwlqByRdviqDyRvx9oITn0KMrxvTj+59xGW/bjp+clGCBkxumDYNbo74sqY79uhVhx+1QHrDOOd3tTXCL20gWUCxdvRKW0+oVEvHvzZFSX5rpqQ79uBZjQYG3QRF6munPAioZHj5GPZsmtZ1nYa3ysDNEMBrQH6sebo1Bk04peVklr0f9kX3t3hOZNG7mI0UunySgewSgVherClkxG15aajk/jVMP8/VvOwqIbJqj+VpafZXq/8Z7RJbeehbdummR636GYhdf99NYCpLno7zhwrFNa7PWL1OrM3N7aD/jtKgsCgItHVpvOm6iZY3QWbqQNVV6QZanVrZfospflZ6Ew2xutbjdIa9GPFm+te7bdvcOn/PoBImntC77DjraK1j68NCQR6DwaJzuUHKnzQ3MwrUVfTwwNiurIbWv326nsy17ThBTFzfuEkCTXpI5COlkBAZ3LmWgejRfiBSWLtBZ9PUS39CNVQmf3jrdaOYLgFNEN+mifu99Fs8blTvZkktai33FCCXVKA04JfFvbqZa+Wud9OvCDaX3dNsH3uNk8YNgz0uzxK0bi9nMG4Btn9Fb9PVmTs+Idx+j+f37BUAt2kKnrGgq6IyRpvXKWnsZJpCM37N4JX9pvP7EMfbrmO2maJ5nQUI6fv6I9Mzkd6JUiLcL5M/phSGURLvn9u4byEQHjG8oxvqFc04/vxffeRG4tq53JXXIzAQB5WfpGRz12xUjUluVZOqZZ0lr01doKnX364f+rd54K03C8pa3DdyF9aKgowGdHjrtthmVqSnMxtk+ZpX1YcenY49On9ib9M1ePsX3/Rvjh9L7oXZaLaQO76dre6nwJK6S3e0clnOpP/mdNh230DM9k9r9PU9DHr+YOc9sEz5OsJyH6OE01HePjxL6EOB0QMTsUxKVjanyhA2kh+oe+OIk5D/4bH332eYd0PZfHB9dQSCI5FhcF8c7tZHRcfXz3iHfKJSQiLUT/tTW7sXzbAfxq8cYO6XbVys8u22HLfoTU4+/fPQO3nzPAbTMARIZsegO9Ha1nD9Z2l2QY6Ah1cqiq3xqGaSH6Wtg1yuC1tbst2+IH/HZze4H+3QtxxemnRrm4PXongtFr6Zbb4qtx1rr97VdHANCO3SOok9YduYnu49+/uaVTnA1BEOKjp4Kwow6pLgmPpNLlphUHVDspLfrLt+3H6p2HkBvjh2Vm3L9oA4b0jL8Yxl0vrXXSPN8hD0764laYhXj3XLzKxWNRITxFSjdj5zz4Fm79y6pON8CHOw7i1//YhO89vbw9zQ+97oJgFbNiGMmm9ZgYXkzcRlF249m9/0Lzk7ncJqVFX4vIKjjHW7SD5Bu9kaTOEPyE3berGy1rI2WwuyP3vOE9MV0Zk++3Rz+h6BPRI0S0h4hWRaWVENEiItqo/O8S9dtNRLSJiNYT0TSnDLdCRNDjBmwyeBfL66R/uW5yfdKO5b5AeONGdTrgmqCNnpb+owCmx6TNB7CYmesBLFa+g4gGAJgLYKCS58HIQulegGL+Rwu13D/pyw1TGkzle/ZbYzG2rtRma7yNlo+9w0RHO48XZ2dG+pikP+oUetbIfQNA7OLmswA8pnx+DMDsqPSnmfk4M38EYBOAkTbZ6ijSakiMnCPreKOdbRwvv8mqh0j3sMEuY9anX8HMuwBA+d9VSa8E8EnUdtuVtE4Q0TwiWkJES5qbm02a4RxGffrpMFZfRD91MLzSVXtGrf1ZMscSbh3brxWL3R25aqdf9cww80JmbmLmpvJyZ4MPaQVRi5vHYNPmwNGThrYXhFTCy28CgH8F2gnMiv5uIuoOAMr/PUr6dgBVUdv1BLDTvHnOEm+G4ppdxqJoSiNY8ANeF+dYtJ6rrQtmui7kfu0nMCv6LwK4XPl8OYAXotLnElEWEfUGUA/gPWsm2k/kYmXEWQ3l/a37k2WO4Ft8pqBRGJWryJuvVj61iLV2oGcClpr4JiPKptuVjln0DNl8CsDbAPoS0XYiuhLAAgBTiGgjgCnKdzDzagB/ArAGwMsArmHmVqeMN0zMdQ/auATW4eMttu1L8A+GY9g4Y0bSsGsilNYbx5QBFYb35XZfk9vHN0rCMAzMfLHGT5M1tr8bwN1WjHKMmIXNOw7Z9NmVcwE5R7GQYXdJZPPasjxsiQn1LRjD7Xa231xlEdJyRm4Ev76euYXfWjReJivkzvQVq3e8XbeArS4g+3aVFqSX6Me5O1rapAIQjGNWvNwWKicrcDvfCO2cNW83fm0EpZfoxxB9z6w1OFpHEPyIWaFMlC1a6JP9Bu1WsERx7/iISx9+F4D7PkEhfXHj3rNDpJKtr/EO553n119N/rQQ/e//eUWH7/sjE6m8c9cIaYK/5OEURlrvye7w17UCnjzs7aSF6Av24FfBsotbZ/Z32wRL2DGW3ksjuPzqXnGbtBb9E63a8fSFzvi148ouqpTl+boVhtdkzTExAsdtnXL7+I6g4770UmXlNim9XGIscuEFO/iPLw/B1IEVGNCj0PQ+3L4TjT4LCTtyDRaoNC9T13bx96ttVDLeAvxagaZsS//2F1appk//5RtJtkRIFSL6U5gdwpxhPa3tywXVZ7YuhnbZfe8F1pcbPBWGwV389gacsqL/+Nsfq6av+/Rwki0RUgW/tuzswEhloUcEi3P1tfT1Ha/zAf0mxMkkpUT/jhdX40/vf5J4Q8FX9CrNddsE23GrE9KqGPpFS6WTV5uU8uk/+tZWAMCFp1XF31DwDXOGVWLFJwfcNgOAuuAZ9Y27LZpeF0OVNrvmtlaL8uRVo7B9/zGLe/EfKSX6Vjn0hSyEEh+3Jct7GB3/7abmdmjlG7yUdoxzz8/KwJEE0WiNHCWeT1+PvWPrygwcTfv4fiOl3DuJSHQjLPznliRZ4k/c8JMSgME9i5J/YIdxb4k/i1iwOzfT+BBXP/jmfWBiB9KqpZ+oZj520juh/wUFAoIeeaxUOwxN2ubW6B038rqBV4Zn/2h6P+RluRNRVYu0auk/v3xH3N8f/tdHSbJE8CPJiuo4sa+za0ZbIZ6YzhjUDQBwz3mDk2JL5M1drQL1StiFb51Zh8vG1LhtRgdSsqX/r42fuW2CIJjmP84fgpH3LHb0GE60gx/66oi4v+uRYSN2xVsuUdDGtOgTUV8Af4xKqgVwO4BiAFcBaFbSb2bmv5m20ARfVaJoCoKd2BnC1y13ienQyjbboRc9Z9w9v7833iaMYlr0mXk9gEYAIKIggB0AngfwdQC/YOZ7bbFQvz3JPJyQJAjkmVd1v9Mx4Fry+yKkPe4N7PLpTwawmZnVp8EmgZdXferWodMGtx7aVJT8eOJZlBtKniF6cW0yWZxx+nFsivyWp4wYOrexh51m+Rq7RH8ugKeivl9LRCuJ6BEi6qKWgYjmEdESIlrS3Nystokhbv2LeqwdwT7cWqHIK6hOznLglGRlBLF1wUzb92vLIipa6TpOhJ7DX9Ckf2KlnjfAuq752LpgJhqrinXv1yh+ey4siz4RZQI4F8CflaSHANQh7PrZBeA+tXzMvJCZm5i5qbzc+miFvZ+fsLwPwZt45ZFSkxizQuq3zkctgT1nSHdbjzNlQIXhPH4TXbexo6U/A8AyZt4NAMy8m5lbmbkNwO8AjLThGKrsF6FPec6otzZrMt1p6tUFDRX5ndLNyqSXBDZehVvZJQcAMLux0pXjexk7hmxejCjXDhF1Z+Zdytc5ABzzu2zbd9SpXQseYUxdKd7YYN39Zwd2uneS1TntIY02JZK6Ru+opJXlZ2Hj3TOQEfDQCfAIllr6RJQLYAqA56KSf0ZEHxLRSgATAVxv5Rjxj+/UngUv4XaDqqmXareUKZJ9y8bG0DfbOtXK5/a1iUcoGHD0zcSv+mNJ9Jn5KDOXMvPBqLRLmXkwMw9h5nOjWv22E/DrWfcpbpxtM/Fa7CYrZN/EdSMiufon02w7rl1oPXJujqd3LY6Rl2u8OKRVGAbBfxRke2f4op3ioqcjNy/LuveVyB67vahvXhFdvzU9fS360tJPLm6dbrsOq9ahqYfIfWanyLh965o9fmy2gco6wT2KcxLmNeXTj2Nnl7xwg2B4tX3uNyNEor92K8p25fhm8XXsnYCvqywh2TzzrbHYd+QEzrz3dbdN6cTZg7vhbx86P8HQaAdyVUl8Mb96fB3G15djUGXyw1/37JKLl793BmrLzFXmVvnOpHpMGVCBgT38Ffrb17Lpt7HOgjnsamAXZodQU5ZnOr+TrfPqEvN2xUPrGdH77PzPtacD0A5zEgiQo4KfyM5+3QqRmWGPjP34SwMMbR8MkO8EH/C56MtoLCEZODECJFZDnapQrAwN7V2W12kB82S7pdRi0VfqcCWZoapL6q3FrIavRd9LE0XSgXR/s3LydvP6mbXnbcv4XmrL8/GHr5/W/v1P3xyDv1wzzhZr0hVf+/RF871JXmYQn59InVXInLjNYu9dx4YzxlhvtjO6oiDcWZkdSv4Q2ol9u7Z/Htm7JOnHTzV8LfoyesebDOlZjLe37LVtf25f5WTcZsl+izJapp9dMMSGTku3r6QA+N2947YBaYbd4tdd51A3t4djO9LSj/1u8CCja821ePW09O+9YGintMLsEM4f0dPUMaOObjG/YAfS0hdsJ3rtUq9MoPEDj18x0hOxYoZVh8MQu2+J4AT+bunLXelpfnlRY9zf/VIhJMNMAjC+oRxj+zgXVZTB8szEIahUuBnB1D5Jvm7pyw3sTSJvYOUFWehRlI2dB79w2SJ7sNPv3qkicTIwWJTdfqlo3WB8Qznmja/FvPG1bpviKL5u6Yt7x5vce8FQXDamF0b1Lo27nVy+Uxg5FffMGWxIvNXG6keOpxbQzqmKwesVTjBAuPns/ijLz3LbFEcR0Rdsp0dxDn46a1D767LQGStn5pJR1aYOpLbIvN0rXwnex9eiL5rvb7ze8ksmSb+XleOpXYNUC4EsdEREXxDcwMM3b7q6d9IFf4u+DCoTkkA8sbIrFIhT93L0XlV9+/EOK49XSmJ1ucStytKIHxDREiWthIgWEdFG5b9jwa7FZZxczOhbSjXuVMqvFn2yVk8kz5h8zgVciz96R1rf6YcdLf2JzNzIzE3K9/kAFjNzPYDFyndHkI7c9OArRtO3kawAABK7SURBVDouPUBpfmbijWJIdCdPaCjv8N2IVquP3pFnJ11xwr0zC8BjyufHAMx24BgAPO0WFWykqaYEWxfMdO34l43pBeDUKlG2EHPzJrqXv2w5BEJk9I4enGn+y0uFN7Aq+gzgVSJaSkTzlLSKyGLoyv+uahmJaB4RLSGiJc3NzaYOLqGVnSMZp9ZKrPdkMrl/BbYumImuBZ1jBdnm00+wH7uWNxQEq6I/jpmHA5gB4BoiGq83IzMvZOYmZm4qLy9PnEEF0XznuHik/S6VG6Y02L5Pp/nz1WPi/q61olQiLN+6jtaXzncqC+5hSfSZeafyfw+A5wGMBLCbiLoDgPJ/j1UjtRCffkfuMLjcW7JJ9U7DaQMrHNu3Hecu+s1KHp30xbToE1EeERVEPgOYCmAVgBcBXK5sdjmAF6waqYWM3nGGHjpDHhslVmhmN1YCAEIaAa6+NLSHavr6u6bbapcVIm6Z313WhCvG9bawH6MZjGwatbGuykN8+qmMlZZ+BYB/EdEKAO8BeImZXwawAMAUItoIYIry3RFkBEJH7PIvv/6Diba1yqP3E2vdj6b3w6qfTENupnrcv19c2DmuOwBkZSR/9aZEhIJk6PzHnt6kL6Ji0zaC/zAdZZOZtwDo9FQy814Ak60YpRd5RXWGzAz1toDViiD2egUChPws9VswMxhARtBYm2RsXSne2mxsxS673hat1pGJ7uVOv1sMuCakL74OrSw+feew69RG70erJayWbFSo3rt5MgpzQuh328sYXVuCd7bsS5hn+W1TEHQpdnrsUeu75jtznJiAa3pwLgyDVD5ewNeiL5rvHE64d5yka2G4H+LNH05EWX4W+t/+sua2799yFoIBQpc845OotLByK756/Xg0VBTYZosW0uIXAJ/H3pGWvr/QulyNVcWdtzUpo1UluchRiREfTXlBFkpsFHwg7G0x0pKNPhdOC36HMAw6hN+5KJvxd5wT8l5fjRlundnfbRPi4mvRF8nviNN1YCIxTYSWkP/mkuE4b3ilpX27hdlT7ranw2sTGxddPx5v/HCi22bYQnGuvQ0Ku/G36Hvrvk15tDpd46FnbHheVgaGVBYZ2u+L144zbIte3po/CXk6K7ho7bYyeidZ6Kls3PDp11cUoLwgtVes8go+9+mL6jtH5wc02+Lrt51Xa0jPzi4hu+hRnIN+3Qux9OP9zr1NWlRWu/zz8faSas/XazeMx4kW6dfwtegL3ifapeNHDUkkEabdOybzmTuWv4TutRsmYP/RE7bvt09X5zvL/YCIvuAofhOcVEOrH8XLFXAfh4avCmFE9FMIDz/HAKzPOv3rd07H7kNf2GSNPhJZHOnnCAUCyFDG/OdlZeCZq8fgeEubZj693p1+3Qqw7tPDnc6dkf6VjqN3hHRHRF9IGtGty8evGGk4/6DKIgwy2OGrlwcuGYZrn1xuON/dcwZhQI9CjOtTCgCYP6MfLhjRE6X58Tsl23Sqfm15HtZ9erjTG9O9FwzFn5dux4K/rzNsM+D9BoLgHL4eveN1nrpqdHIPmOCd/dGvn5YkQ04R0gilML5BPZx2+1KDBlRp4aUjjJrViXOGdAzuFgkCl8gNUpybiWsm9gFROPbO1RPqEgo+oL+lr/V2VJqfhasn1Gnm0xorrrdcQuoiou8gY+pKMbauNHkHjKMkt50zALVlyfeVRrfo9YwGOa2mJPxBhyj+59xGLLx0BKYO7GbWPE1+cVEj5o2vxbAqx5Z41oXZPpEB3dVX+dJTLqdcQOniWvJ6fSqinyZcebqxsL92jdWuLc/H18bWJNzOzPDAWY2Vjgg+AHQvysHNZ/dHwOb43ePqSvGVUdVYcP5gQ/mM9odoXT4j5fK6eHmVmUO6Y+5pVbjlbG/OzBWffirh8Xd2Q/rp7aKYJiMYwN1zBmNPkjqkiYBfXzIMv319M+o9MmTxJ+cONJznB9P6orok1wFr7Cc7FMSC84e4bYYm0tIXTHPOkO66tot0WsbTcaciMJ5RX+bIfq2ix+9vFw0VBbj/okYEPbLq0Lkai+PE45qJfTQX1RGMIaKfQqg90mf1r8DPv+xMq0MtUJoaET13Y4bnf105KunH1INXBFhIP0T0DWBGs26c6u5i4AsvHYELmqps2993J9ebzhvv/Hlhyv/XxtbgIhvPlR18+8w+KMvPNDwgYEjPIlQUZiVcjP4bZ/RGqc0RRwVtzh3aI+6oq2Rg2qdPRFUAHgfQDUAbgIXM/J9EdAeAqwA0K5vezMx/s2qoFzDjgRjRq8R+Q3yGkREods3gnTqgAkU5IUN57jDha3aaQZVFWHLrFMP5CrJDePfmsxJu169bIZbeNgU1818yY55gkF9dPMxtEyx15LYAuJGZlykLpC8lokXKb79g5nutm+csjVXF+OCTA7q3b6jIx4bdR0wfLysjEHeWph8w0yBvd+/Ya0pcFl7W1P55QkO5bcsipgN2d6/UludhXF0ZXlyx094dC6Yw7d5h5l3MvEz5fBjAWgCeC4peWZyj+dvI3tqt8GHVxdi6YCaGRvmttWaDjqnV9+r9/84bjK0LZura1m30PPgTNCZYddpX5IOOGsOJBcIfu2Ik/vB14zOA/Ybe66FG96LsTml2edz+ceOZuHP2INv3K5jDFp8+EdUAGAbgXSXpWiJaSUSPEJHqLBAimkdES4hoSXNzs9omlnnthglYfOMEzd/jjRi5ZGQ1AOCFa8bhji8NAKAe72TLPWfjqXmdZ96OUqlQ9IwpH9enFHXleQm3UyPZD1O9yopPquvdGmjpS4A2d3j7pslumyAkCcuiT0T5AJ4F8D1mPgTgIQB1ABoB7AJwn1o+Zl7IzE3M3FRebr6FEi/wFFH8FmtbnN+iOz8jE1nU4qVoCe0fvzmmU5reIFle6NRMxHUaHboNKmPBpw6sAACM6OXu7FZBECyKPhGFEBb8J5j5OQBg5t3M3MrMbQB+B8DR9+pVP5mmbV+CvFoVQuzY7ogIM6OTe8aMQGsJphF6dtF2W0UTbV6sqZkacXHsZmLfrti6YCb6a4QGEOxh+iBnZicLqYXpp57CavcwgLXMfH9UevSMnTkAVpk3zxrhIFjav2u5En46a1CH74MVX/6YmGFz6++arsuOzGDAE37M2EruyatGYcNdM3TlzcqIf6tElhc0KzyR8+NGp28qsO7O6Zh7mreGmwrexMronXEALgXwIRF9oKTdDOBiImpEuP9uK4BvWrLQAgGKPwlGq6VfFdOKbqwqxorbp6Iot+MQwKwMfcsHrrxjqq7tkk0gQMhUxHxsXSne2rxXc9t75sSPFdMlLxNv3XQGCkysowu4v1C4XxhZoz74wOpSlkL6YFr0mflfUG+QeWZMfmZGAKFgAEtvPQsj7npNd76ASrM8VvCNEPtADuzhjJsj0cgXrbeN5bdNQW5WEH1vfbnTb/27F2LtrkOo01jNKLLIR4DI8Lh4NTIUl9NZ/Sss7yvVWH7bFOToXLDdLA0VsmpVqpPSAdciPuvoOCeF2Rk49EULgLBgqZHIFTO0ZxFWbD9o2q5pA7shNzOIoyda0bssDx999rnpfVkhUswuKjMyI66vy8f0wpi6UvQqVR9R9Mu5jZj+yzdtCyuQESC8NX8SSvNllmgsatdJjcEmF5p575bJyMvMwCf7j5rKnwin4isJxkht0VfxQ4/sXYLX1u7Bc98ei2FVxbj31Q347MjxDtsk6px98qrR2HvE2sLN1SW5WPfpYQyqLHJN9GMfwfdvUZ/BGS34vZVFTiKLndjVGRx9ynvEmVshdKQ0LxN7Pz91L77xg4koMVlhdi3oOFbfiTkTTu5X0Efaif6vLx6OrXs/bx9Jsuj68Rh256JO28UjLysDeSZ917FcPaEW2/cfxfJt4ZnBpPORGFxZhO37j3VIixbO4dXFWLZN/2xjACgvSBz58dyhPVBdkqs72JqgzTs3Tda9bKIWr90wAQeOnWz/Xl3qj/DDgnukdMA1tVZoTmaww9BBva/MThEgwtCeHQW0QZn09MeoSV+Lb5yAt+ZPav9+34VD21/juypiXRblxnr0ipF48dpxlsf8x2YnIgyr7tJpv1Zf3SNlqNQ5FDUV6FaUbfmtpkteZvvbl13kZYYbNH00+nHMEnnugkFp6btJSrf0vTzJKZ5G/uzLQ3DJqGqMigrvUFfe8QHMzczA8OpifLjjIL45oQ69y3IxsW9XvHr9eGQGAyjMDmFITGUSO+wy3tnpVhQWo+Lc+JWiXed42sBuePjyJpzZt6st+xPMU1WSiye+Mcr2t7mFlzVh7a5DuicpCs6Qcme/uiQX2/Y50xFlJ5GO0ljN7FqYhbysDIzro3/xjyABk/qFR7s0qIRGOLXvbPzha6fhzr+uwZYE/QjfmdQHfSsKMHWAvaNo/vqd05Ed6vwGRkSYLCN2PIOR+08vRTkhjNYZp0pwjpQT/QABb/5wYtI7R//6ndPR2saY9Zt/69r+1CQkQrEyHHRsXSnujJkYFsviGydgz6Fwx3OR0go30r8wsV9XPPHuNmz57PO48wxCwQBm6lgZK6S8qsd2AmqhFbROEITkkHKiX5KXiaqSXFQZWE/zpetOx+vrm/HzV9abPq5RMYv0JYSChG+f2QcleZn4yqheCYc+1pXnt7t6rplYh7L8TJw/vKehY993wVC8svpTDLBhvkDPLrm474KhOLOv+fhJgiAkj5QS/TtnD8K0gcZdBAN7FGFgjyJLoh/hxWvHtbfE4/GbS4bj5VW7UKsI+GVjagwfKysjaCpfUW4IF9o4Zf/8EcYqHUEQ3CMlRP/Zb43FroPHcM4Q7YWTf3XxsIRByv523RlYs+uQJVtiO0+1KC/IwqUmBFsQBMEKKSH64ZC98cP2njtUu0KIMKBHoS0uDzu5Z85g9O+u3TkrCIJghJQQ/VTmklHVbpsgCEIKkdKTswRBEISOiOgLgiCkESL6giAIaYSIviAIQhohoi8IgpBGiOgLgiCkESL6giAIaYSIviAIQhpBXli3koiaAXxsYRdlAD6zyRwvIOXxNlIe75NqZdIqTy9mNhTt0BOibxUiWsLMTW7bYRdSHm8j5fE+qVYmO8sj7h1BEIQ0QkRfEAQhjUgV0V/otgE2I+XxNlIe75NqZbKtPCnh0xcEQRD0kSotfUEQBEEHIvqCIAhphK9Fn4imE9F6ItpERPPdtkcvRLSViD4kog+IaImSVkJEi4hoo/K/S9T2NyllXE9E09yzvN2eR4hoDxGtikozbD8RjVDOwyYi+hURxV8V3kE0ynQHEe1QrtMHRHR21G+eLRMRVRHR/xLRWiJaTUTfVdJ9e43ilMmv1yibiN4johVKeX6ipDt/jZjZl38AggA2A6gFkAlgBYABbtul0/atAMpi0n4GYL7yeT6A/1A+D1DKlgWgt1LmoMv2jwcwHMAqK/YDeA/AGAAE4O8AZnisTHcA+L7Ktp4uE4DuAIYrnwsAbFBs9u01ilMmv14jApCvfA4BeBfA6GRcIz+39EcC2MTMW5j5BICnAcxy2SYrzALwmPL5MQCzo9KfZubjzPwRgE0Il901mPkNAPtikg3ZT0TdARQy89scvnMfj8qTdDTKpIWny8TMu5h5mfL5MIC1ACrh42sUp0xaeLpMHOaI8jWk/DGScI38LPqVAD6J+r4d8W8CL8EAXiWipUQ0T0mrYOZdQPgGB9BVSfdLOY3aX6l8jk33GtcS0UrF/RN51fZNmYioBsAwhFuSKXGNYsoE+PQaEVGQiD4AsAfAImZOyjXys+ir+a38Mv50HDMPBzADwDVEND7Otn4uJ6Btvx/K9RCAOgCNAHYBuE9J90WZiCgfwLMAvsfMh+JtqpLmufIAqmXy7TVi5lZmbgTQE+FW+6A4m9tWHj+L/nYAVVHfewLY6ZIthmDmncr/PQCeR9hds1t5VYPyf4+yuV/KadT+7crn2HTPwMy7lQezDcDvcMqt5vkyEVEIYXF8gpmfU5J9fY3UyuTnaxSBmQ8AeB3AdCThGvlZ9N8HUE9EvYkoE8BcAC+6bFNCiCiPiAoinwFMBbAKYdsvVza7HMALyucXAcwloiwi6g2gHuGOG69hyH7l1fUwEY1WRhtcFpXHE0QePoU5CF8nwONlUo79MIC1zHx/1E++vUZaZfLxNSonomLlcw6AswCsQzKuUbJ7re38A3A2wr34mwHc4rY9Om2uRbgXfgWA1RG7AZQCWAxgo/K/JCrPLUoZ18PFES5R9jyF8Kv0SYRbGleasR9AE8IP6WYAD0CZIe6hMv0XgA8BrFQeuu5+KBOA0xF+xV8J4APl72w/X6M4ZfLrNRoCYLli9yoAtyvpjl8jCcMgCIKQRvjZvSMIgiAYRERfEAQhjRDRFwRBSCNE9AVBENIIEX1BEIQ0QkRfEAQhjRDRFwRBSCP+D7vyQL47HopGAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    episode_reward, elapsed_steps = play_episode(env.unwrapped, agent,\n",
    "            max_episode_steps=env._max_episode_steps, mode='train')\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('train episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-20:]) > 199:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
